Example #1
0
    def poll(self):
        tso = TwitterSearchOrder()
        tso.set_keywords(self._config['query'])

        language = self._config.get('language', None)
        if language:
            tso.set_language(language)

        tso.set_result_type('recent')
        tso.set_count(self._config.get('count', 30))
        tso.set_include_entities(False)

        last_id = self._get_last_id()

        if last_id:
            tso.set_since_id(int(last_id))

        try:
            tweets = self._client.search_tweets(tso)
            tweets = tweets['content']['statuses']
        except Exception as e:
            self._logger.exception('Polling Twitter failed: %s' % (str(e)))
            return

        tweets = list(reversed(tweets))

        if tweets:
            self._set_last_id(last_id=tweets[-1]['id'])

        for tweet in tweets:
            self._dispatch_trigger_for_tweet(tweet=tweet)
    def poll(self):
        tso = TwitterSearchOrder()
        tso.set_keywords(self._config['query'], True)

        language = self._config.get('language', None)
        if language:
            tso.set_language(language)

        tso.set_result_type('recent')
        tso.set_count(self._config.get('count', 30))
        tso.set_include_entities(False)

        last_id = self._get_last_id()

        if last_id:
            tso.set_since_id(int(last_id))

        try:
            tweets = self._client.search_tweets(tso)
            tweets = tweets['content']['statuses']
        except Exception as e:
            self._logger.exception('Polling Twitter failed: %s' % (str(e)))
            return

        tweets = list(reversed(tweets))

        if tweets:
            self._set_last_id(last_id=tweets[-1]['id'])

        for tweet in tweets:
            self._dispatch_trigger_for_tweet(tweet=tweet)
    def poll(self):
        tso = TwitterSearchOrder()
        tso.set_keywords(self._config["query"])

        language = self._config.get("language", None)
        if language:
            tso.set_language(language)

        tso.set_result_type("recent")
        tso.set_count(self._config.get("count", 30))
        tso.set_include_entities(False)

        last_id = self._get_last_id()

        if last_id:
            tso.set_since_id(int(last_id))

        try:
            tweets = self._client.search_tweets(tso)
            tweets = tweets["content"]["statuses"]
        except Exception as e:
            self._logger.exception("Polling Twitter failed: %s" % (str(e)))
            return

        tweets = list(reversed(tweets))

        if tweets:
            self._set_last_id(last_id=tweets[-1]["id"])

        for tweet in tweets:
            self._dispatch_trigger_for_tweet(tweet=tweet)
    print 'The Mongo collection ' + keyword + ' is empty. The script will now collect all tweets.'
    
# create a TwitterSearchOrder object
tso = TwitterSearchOrder() 

# let's define all words we would like to have a look for
tso.set_keywords([keyword])

# Select language
tso.set_language('en') 

# Include Entity information
tso.set_include_entities(True)

if db_count is not 0:
    tso.set_since_id(latest_id_int)
    print 'Since the document count in the ' + keyword + ' collection is above 0, the since_id uses the parameter of the latest tweet so that only new tweets are collected.'
else:
	print 'No documents exist in the ' + keyword + ' collection right now so the since_id parameter will be empty and all tweets will be collected.'

    
# Create a TwitterSearch object with our secret tokens
ts = TwitterSearch(
    consumer_key = config.get('Twitter', 'consumer_key'),
    consumer_secret = config.get('Twitter', 'consumer_secret'),
    access_token = config.get('Twitter', 'access_token'),
    access_token_secret = config.get('Twitter', 'access_token_secret')
 )
 
# Perform the search
twitter_search = ts.search_tweets_iterable(tso)
def collect_tweets(keyword, count, force=False):    
    from TwitterSearch import TwitterSearch
    from TwitterSearch import TwitterSearchOrder
    import pymongo
    from dateutil.parser import parse
    from alchemyapi import AlchemyAPI
    import ConfigParser
    
    # try:
    #     keyword = sys.argv[1]
    #     count = int(sys.argv[2])
    # except IndexError:
    # 	e_too_few_args = "You did not enter enough arguments. Two are required: keyword, and count"
    # 	raise Exception(e_too_few_args)
    # try:
    #     if sys.argv[3] == '-f':
    #         force = True
    #     else:
    #         e_invalid_argument = "The only option available is -f. It is used to force the script to continue when the Alchemy API limit is exceeded."
    #         raise Exception(e_invalid_argument)    
    # except IndexError:
    #     force = False
    
    # Read the config file for config variables
    config = ConfigParser.RawConfigParser()
    config.read('config.cfg')
    mongo_url = config.get('Mongo', 'db_url')
    
    # Connect to the Mongo database using MongoClient
    
    client = pymongo.MongoClient(mongo_url)
    db = client.get_default_database()
    # Access/create the collection based on the command line argument
    tweets = db[keyword]
    
    #Generate the alchemyapi variable
    alchemyapi = AlchemyAPI()
    
    # To accommodate for hashtags the user can substitute a . for the # in the command line. Lines 30 & 31 return it to a hashtag for the search.
    if keyword[0] is ".":
        keyword = keyword.replace('.', '#')
    
    # Lines 33-42 ensure that the query is not doing duplicate work.
    # First, it counts to see how many documents exist in the collection
    db_count = tweets.count()
    
    # If there are documents in the collection, the collection is queried, tweet objects are sorted by date, and the tweet_id of the most recent tweet is retrieved and later set as the "since_id"
    if db_count is not 0:
        latest_id = tweets.find( {}, { 'object.tweet_id':1 } ).sort("startedAtTime").limit(1)
        latest_id_str = latest_id[db_count-1]['object']['tweet_id']
        latest_id_int = int(latest_id_str)
        print 'Count of documents in the ' + keyword + ' collection is not 0. It is ' + str(db_count) + '. Mongo is now identifying the latest tweet ID to append as a parameter to the API call.'
    # If ther are no documents in the collection, no queries are done, and the since_id is left out of the API call.    
    else:
        print 'The Mongo collection ' + keyword + ' is empty. The script will now collect all tweets.'
        
    # create a TwitterSearchOrder object
    tso = TwitterSearchOrder() 
    
    # let's define all words we would like to have a look for
    tso.set_keywords([keyword])
    
    # Select language
    tso.set_language('en') 
    
    # Include Entity information
    tso.set_include_entities(True)
    
    if db_count is not 0:
        tso.set_since_id(latest_id_int)
        print 'Since the document count in the ' + keyword + ' collection is above 0, the since_id uses the parameter of the latest tweet so that only new tweets are collected.'
    else:
    	print 'No documents exist in the ' + keyword + ' collection right now so the since_id parameter will be empty and all tweets will be collected.'
    
        
    # Create a TwitterSearch object with our secret tokens
    ts = TwitterSearch(
        consumer_key = config.get('Twitter', 'consumer_key'),
        consumer_secret = config.get('Twitter', 'consumer_secret'),
        access_token = config.get('Twitter', 'access_token'),
        access_token_secret = config.get('Twitter', 'access_token_secret')
     )
     
    # Perform the search
    twitter_search = ts.search_tweets_iterable(tso)

    # Start the insert count variable
    db_inserts = 0
    
    # this is where the fun actually starts :)
    try:
        for tweet in twitter_search:
            if db_inserts < count:
                mentions_list = []
                hashtags_list = []
                # Create the caliper_tweet object
                caliper_tweet = {
              "context": "http://purl.imsglobal.org/ctx/caliper/v1/MessagingEvent",
              "type": "MessagingEvent",
              "startedAtTime": "",
              ## Can be used to query Twitter API for user information
              "actor": "",
              "verb": "tweetSent",
              "object": {
                "type": "MessagingEvent",
                "tweet_id": "",
                "tweet_uri": "",
                "subtype": "tweet",
                ## "to" should be calculated by checking in_reply_to_user_id_str is null. If it's not null, then it should be concatenated to "uri:twitter/user/" and stored in "object"['to']
                "to": "",
                "author": {
                    "author_uri": "",
                    "author_alias": "",
                    "author_name": "",
                    },
                "text": "",
                "sentiment": {
                    "type": "",
                    "score": "",
                    "color": ""
                },
                "parent": "",
                ## "mentions" is an array of the caliper IDs from the user_mentions objects array
                "user_mentions": [],
                ## "hashtags" is an array of the hashtag texts included in the tweet entities
                "hashtags": []
              }
            }
                
                 # Set the re-usable variables
                tweet_text = tweet['text']
                
                ## AlchemyAPI Sentiment Analysis
                tweet_sentiment = ''
                response = alchemyapi.sentiment('text', tweet_text)
                if 'docSentiment' in response.keys():
                    if 'score' in response['docSentiment']:
                        tweet_sentiment_score = response['docSentiment']['score']
                        tweet_sentiment_score = float(tweet_sentiment_score)
                        tweet_sentiment_score = round(tweet_sentiment_score, 2)
                    else:
                        tweet_sentiment_score = 0
                    tweet_sentiment_type = response['docSentiment']['type']
                    tweet_sentiment_score_a = abs(tweet_sentiment_score)
                    if (tweet_sentiment_score) > 0:
                        tweet_sentiment_color = "rgba(0,255,0," + str(tweet_sentiment_score_a) + ")"
                    else: 
                        tweet_sentiment_color = "rgba(255,0,0," + str(tweet_sentiment_score_a) + ")"
                elif force == True:
                    print 'Force option set to true. The tweet_sentiment object will be set with API Limit Exceeded values.'
                    tweet_sentiment_type = 'API Limit Exceeded'
                    tweet_sentiment_score = 0
                    tweet_sentiment_color = 'rgba(0,0,0,0)'
                else:
                    e_alchemy_api_limit = 'Alchemy API daily limit exceeded. Retry search with force=True to continue'
                    raise Exception(e_alchemy_api_limit)
                    
            
                ds = tweet['created_at']
                tweet_date = parse(ds)
                caliper_tweet['startedAtTime'] = tweet_date
                caliper_tweet['actor'] = 'student:' + tweet['user']['screen_name']
                caliper_tweet['object']['tweet_uri'] = 'https://twitter.com/' + tweet['user']['screen_name'] + '/status/' + tweet['id_str']
                caliper_tweet['object']['tweet_id'] = tweet['id_str']
                if tweet['in_reply_to_user_id_str'] is None:
                    caliper_tweet['object']['to'] = 'NoReply'
                    caliper_tweet['object']['parent'] = 'NoReply'
                else:
                    caliper_tweet['object']['to'] = 'https://twitter.com/intent/user?user_id=' + tweet['in_reply_to_user_id_str']
                    if tweet['in_reply_to_status_id_str'] is None:
                        caliper_tweet['object']['parent'] = 'None'
                    else:    
                        caliper_tweet['object']['parent'] = 'https://twitter.com/' + tweet['user']['screen_name'] + '/status/' + tweet['in_reply_to_status_id_str']
                caliper_tweet['object']['author']['author_uri'] = 'https://twitter.com/intent/user?user_id=' + tweet['user']['id_str']
                caliper_tweet['object']['author']['author_alias'] = tweet['user']['screen_name']
                caliper_tweet['object']['author']['author_name'] = tweet['user']['name']
                caliper_tweet['object']['text'] = unicode(tweet['text'])
                caliper_tweet['object']['sentiment']['type'] = tweet_sentiment_type
                caliper_tweet['object']['sentiment']['score'] = tweet_sentiment_score
                caliper_tweet['object']['sentiment']['color'] = tweet_sentiment_color
                for x in list(tweet['entities']['hashtags']):
                    hashtag = x['text']
                    hashtags_list.append(hashtag)
                for x in list(tweet['entities']['user_mentions']):
                    mention = x['id_str']
                    mentions_list.append(mention)
                caliper_tweet['object']['user_mentions'] = mentions_list
                caliper_tweet['object']['hashtags'] = hashtags_list
             
                tweets.insert(caliper_tweet)
                
                db_inserts = db_inserts + 1
                
            else:
                raise StopIteration
    except StopIteration:
        print str(db_inserts) + " inserts made in the " + keyword + " collection."