def get_new_tweets(self, keywords: list) -> None: ''' Use the TwitterSearch lib to fetch tweets that match the given keywords. Pass tweets to the _store method to update the database. ''' tweets = [] if self.DEBUG: print("Searching for tweets with {} as keywords.".format(keywords)) # DEBUG try: tso = TwitterSearchOrder() tso.setKeywords(keywords) tso.setLanguage('en') tso.setCount(1) tso.setIncludeEntities(False) ts = TwitterSearch( consumer_key = 'YOUR STUFF HERE', consumer_secret = 'YOUR STUFF HERE', access_token = 'YOUR STUFF HERE', access_token_secret = 'YOUR STUFF HERE' ) ts.authenticate() for tweet in ts.searchTweetsIterable(tso): tweets.append(tweet) except TwitterSearchException as e: self.report_error(["TwitterSearchException",e]) if self.DEBUG: print("Fetched {} new tweets with {} as keywords.".format(len(tweets),keywords)) # DEBUG self._store(tweets, keywords)
def fetch_tweets(search_request): """ fetches tweets from Twitter API extracts urls and updates db """ try: tso = TwitterSearchOrder() # create a TwitterSearchOrder object tso.setKeywords([search_request]) # define search request tso.setCount(settings.tweets_per_page) # only results_per_page tso.setIncludeEntities(True) # give us entity information # create a TwitterSearch object with our secret tokens ts = TwitterSearch( consumer_key=twitter.TWITTER_CONSUMER_KEY, consumer_secret=twitter.TWITTER_CONSUMER_SECRET, access_token=twitter.TWITTER_ACCESS_TOKEN, access_token_secret=twitter.TWITTER_ACCESS_TOKEN_SECRET ) ts.authenticate() # user must authenticate first tweets = ts.searchTweetsIterable(tso) found_urls = extract_urls(tweets) search_keyword_object = SearchKeyWord() search_keyword_object.gifs = found_urls search_keyword_object.search_keyword = search_request search_keyword_object.updated_at = datetime.now() print(search_keyword_object) search_keyword_object.save() return found_urls except TwitterSearchException, e: # to take care of errors message = e.message
def fetch_tweets(search_request): """ fetches tweets from Twitter API extracts urls and updates db """ try: tso = TwitterSearchOrder() # create a TwitterSearchOrder object tso.setKeywords([search_request]) # define search request tso.setCount(settings.tweets_per_page) # only results_per_page tso.setIncludeEntities(True) # give us entity information # create a TwitterSearch object with our secret tokens ts = TwitterSearch( consumer_key=twitter.TWITTER_CONSUMER_KEY, consumer_secret=twitter.TWITTER_CONSUMER_SECRET, access_token=twitter.TWITTER_ACCESS_TOKEN, access_token_secret=twitter.TWITTER_ACCESS_TOKEN_SECRET) ts.authenticate() # user must authenticate first tweets = ts.searchTweetsIterable(tso) found_urls = extract_urls(tweets) search_keyword_object = SearchKeyWord() search_keyword_object.gifs = found_urls search_keyword_object.search_keyword = search_request search_keyword_object.updated_at = datetime.now() print(search_keyword_object) search_keyword_object.save() return found_urls except TwitterSearchException, e: # to take care of errors message = e.message
def getTweetsForKeyword(keyword, last_id=None): """ Get the (recent) tweets for a given keyword :param keyword: the query keyword :return: a list of tweets. List is empty if an error occurs """ tweet_list = [] try: print '*** Searching tweets for keyword:', keyword, ' ...' tso = TwitterSearchOrder() tso.setKeywords([keyword]) tso.setLanguage('en') tso.setResultType('recent') tso.setCount(100) tso.setIncludeEntities(True) if last_id is not None: tso.setSinceID(last_id) ts = TwitterSearch( consumer_key=params.CONSUMER_KEY, consumer_secret=params.CONSUMER_SECRET, access_token=params.ACCESS_TOKEN, access_token_secret=params.ACCESS_TOKEN_SECRET ) ts.authenticate() counter = 0 for tweet in ts.searchTweetsIterable(tso): counter += 1 tweet_list.append(tweet) print '*** Found a total of %i tweets for keyword:' % counter, keyword return tweet_list except TwitterSearchException, e: print "[ERROR]", e.message return tweet_list
try: tso = TwitterSearchOrder() # create a TwitterSearchOrder object tso.setKeywords(['@vooruit']) # let's define all words we would like to have a look for #tso.setLanguage('nl') # we want to see German tweets only tso.setCount(1) # please dear Mr Twitter, only give us 1 results per page tso.setIncludeEntities(False) # and don't give us all those entity information # it's about time to create a TwitterSearch object with our secret tokens ts = TwitterSearch( consumer_key, consumer_secret, access_token, access_token_secret ) ts.authenticate() # we need to use the oauth authentication first to be able to sign messages counter = 0 # just a small counter # Write mode creates a new file or overwrites the existing content of the file. # Write mode will _always_ destroy the existing contents of a file. # This will create a new file or **overwrite an existing file**. f = open("/mnt/GT3/GT2 Projects/tweets.txt", "w") f.write ("user;tweet;image\n") for tweet in ts.searchTweetsIterable(tso): # this is where the fun actually starts :) #print '@%s tweeted: %s' % (unescape(tweet['user']['screen_name']), unescape(tweet['text'])) try: f.write(unescape(tweet['user']['screen_name'].encode('utf-8','ignore'))) f.write (": ;")
def Get_Data(self): MAX_PAGES = 15 RESULTS_PER_PAGE = 100 tso = TwitterSearchOrder() # create a TwitterSearchOrder object tso.setKeywords(search_keywords) # let's define all words we would like to have a look for tso.setLanguage('en') # we want to see German tweets only tso.setCount(RESULTS_PER_PAGE) # please dear Mr Twitter, only give us 1 results per page tso.setIncludeEntities(False) # and don't give us all those entity information # it's about time to create a TwitterSearch object with our secret tokens ts = TwitterSearch( consumer_key = 'enter your key', consumer_secret = 'enter your secret', access_token = '81498230-z', access_token_secret = 'z' ) ts.authenticate() # we need to use the oauth authentication first to be able to sign messages i=0 no_users = 0 no_tweets = 0 search_results = [] twitter_dict = dict() #Retweet pattern to detect retweets rt_patterns = re.compile(r"(RT|via)((?:\b\W*@\w+)+)", re.IGNORECASE) rt_origins_list = [] rt_origins = '' db = Create_Couchdb_Instance(database_name) while(True): tso.setKeywords([search_keywords[i]]) for tweet in ts.searchTweetsIterable(tso): # this is where the fun actually starts :) try: user_id = tweet['user']['id_str'] tweet_text = tweet['text'] time_stmp = tweet['created_at'] try: if 'media' in tweet['entities']: if len(tweet['entities']['media'])>0: im_id=tweet['entities']['media'][0]['id'] im_url=tweet['entities']['media'][0]['expanded_url'] if 'urls' in tweet['entities']: if len(tweet['entities']['urls'])>0: urls=tweet['entities']['urls'] if 'user_mentions' in tweet['entities']: if len(tweet['entities']['user_mentions'])>0: mentions=tweet['entities']['user_mentions'] except: im_id='' im_url='' urls='' mentions='' # If screen name is not present , add all the details if user_id not in db: twitter_dict=dict() twitter_dict['_id'] = user_id twitter_dict['text']=[{'urls':urls,'image':[im_id,im_url],'mentions':mentions,'tweet_id':tweet['id'],'text':tweet_text,'timestamp':time_stmp,'coordinates':tweet['coordinates'],'source':tweet['source'],'in_reply_to_screen_name':tweet['in_reply_to_screen_name'],'retweet_count':tweet['retweet_count']}] twitter_dict['friends_count'] = tweet['user']['friends_count'] twitter_dict['location'] = tweet['user']['location'] twitter_dict['profile_description'] = tweet['user']['description'] twitter_dict['tweet_count'] = tweet['user']['statuses_count'] twitter_dict['followers_count'] = tweet['user']['followers_count'] twitter_dict['screen_name'] = tweet['user']['screen_name'] twitter_dict['profile_created_at'] = tweet['user']['created_at'] no_users += 1 no_tweets += 1 db.create(twitter_dict) # If screen name is present then check if the tweet is present otherwise append the tweet details else: doc = db[user_id] tweets = {} for tweet_dict in doc['text']: tweets[tweet_dict['text']] = 'tweet' if tweet_text not in tweets: doc['text'].append({'urls':urls,'image':[im_id,im_url],'mentions':mentions,'tweet_id':tweet['id'],'text':tweet_text,'timestamp':time_stmp,'coordinates':tweet['coordinates'],'source':tweet['source'],'in_reply_to_screen_name':tweet['in_reply_to_screen_name'],'retweet_count':tweet['retweet_count']}) no_tweets += 1 db[user_id] = doc print 'No of users {}, No. of Tweets: {}, Time: {}'.format(no_users,no_tweets,strftime("%Y-%m-%d %H:%M:%S")) write_log(no_users,no_tweets) except Exception as e: print 'Exception: {}'.format(r) i = i+1 # In the new API version 1.1 , the rate limits are fixed per 15 mins instead of per hour if i == len(search_keywords): i=0 print "Sleeping for 15 mins" time.sleep(900)
try: tso = TwitterSearchOrder() # create a TwitterSearchOrder object tso.setKeywords([ 'FCX', 'Freeport McMoran' ]) # let's define all words we would like to have a look for tso.setLanguage('en') # we want to see German tweets only tso.setCount(1) # please dear Mr Twitter, only give us 1 results per page tso.setIncludeEntities( False) # and don't give us all those entity information # it's about time to create a TwitterSearch object with our secret tokens ts = TwitterSearch( consumer_key='ug5WVZqqCD9vY8gY9SVCQ', consumer_secret='pr1yfm0oTNbGtZnL1bd5R3Ybl4fr9NteDgDnyjpwaA', access_token='160739524-e0k14cLQGqaQalNKWOhp6QApGGKMYKqJMzZwzohA', access_token_secret='Ml2mEkEr5FSd8ymcEZrQh5gtwWeQSkvtXekK7GZHQR4') ts.authenticate( ) # we need to use the oauth authentication first to be able to sign messages counter = 0 # just a small counter for tweet in ts.searchTweetsIterable( tso): # this is where the fun actually starts :) counter += 1 print '@%s tweeted: %s' % (tweet['user']['screen_name'], tweet['created_at']) #, tweet['date']) print '*** Found a total of %i tweets' % counter except TwitterSearchException, e: # take care of all those ugly errors if there are some print e.message
def viewtwitterevents(request): ''' Query the Twitter API for a given hashtag and date range, using these Twitter-Python bindings: https://github.com/ckoepp/TwitterSearch ''' try: # Hashtag is required. hashtag = None query = None if 'hashtag' in request.GET and len(request.GET['hashtag']) > 0: hashtag = __sanitize_input(request.GET['hashtag']) else: raise Exception('Hashtag is required!') # If datetime range is supplied, use that, else return all events. dtStart = dtEnd = None strStart = strEnd = None if 'date_start' in request.GET and request.GET['date_start']: strStart = request.GET['date_start'] dtStart = datetime.datetime.strptime(strStart, "%Y-%m-%d").date() if 'date_end' in request.GET and request.GET['date_end']: strEnd = request.GET['date_end'] dtEnd = datetime.datetime.strptime(strEnd, "%Y-%m-%d").date() # Check for teh "no tweet cap" param. noTweetCap = False if 'no_tweet_cap' in request.GET and request.GET['no_tweet_cap'] == 'true': noTweetCap = True # First, check the cache for the Twitter API result. cacheKey = hashtag + '_' + strStart + '_' + strEnd secondaryCacheFilePath = settings.SECONDARY_CACHE_DIRECTORY + cacheKey + '.json' response = cache.get(cacheKey) if response == None: if os.path.isfile(secondaryCacheFilePath): with open(secondaryCacheFilePath, "r") as textFile: response = json.load(textFile) if response == None or len(response) == 0: totalEventCnt = 0 totalEventCntThresh = 0 events = [] tsMax = 0 tsMin = sys.maxint # Authenticate with Twitter API. tso = TwitterSearchOrder() tso.setLanguage('en') tso.setCount(100) tso.setIncludeEntities(False) tso.setResultType('recent') # Create a TwitterSearch object with our secret tokens twitterSearch = TwitterSearch( consumer_key=settings.TWITTER_CONSUMER_KEY, consumer_secret=settings.TWITTER_CONSUMER_SECRET, access_token=settings.TWITTER_ACCESS_TOKEN, access_token_secret=settings.TWITTER_ACCESS_TOKEN_SECRET ) twitterSearch.authenticate() # Construct and run the twitter search query. if dtStart != None and dtEnd != None: query = hashtag tso.setUntil(dtEnd) else: query = hashtag tso.setKeywords([query]) maxId = 0 tweetCnt = MAX_INT32 doLoop = True # Page through the Twitter search API results until we either get no results or we arrive at the start date. while (doLoop): # Exit conditions. if not doLoop: break; if tweetCnt == 0: break if maxId > 0: tso.setMaxID(maxId) tso.setKeywords([hashtag]) # Reset counter. tweetCnt = 0 # Reset last tweet. lastTweet = None # Create an additional retry loop for when Twitter refuses the next page. try: for tweet in twitterSearch.searchTweetsIterable(tso): dt = __getDateFromTweetCreatedAt(tweet['created_at']) if dt.date() < dtStart: doLoop = False break; ts = time.mktime(dt.timetuple()) if ts > tsMax: tsMax = ts if ts < tsMin: tsMin = ts lastTweet = tweet # Copy search results to the Event list. events.append( { 'event_key': hashtag, 'event_datetime': str(tweet['created_at']), 'event_timestamp': ts, 'event_value': tweet['text'], 'event_tags': [hashtag], 'raw_data': tweet } ) # Increment counter. tweetCnt += 1 totalEventCnt += 1 totalEventCntThresh += 1 if totalEventCntThresh >= 1000: print('Processed ' + str(totalEventCnt) + ' tweets.') totalEventCntThresh = 0 # Exit conditions: if not noTweetCap and totalEventCnt >= settings.TWITTER_SEARCH_API_TWEET_CAP: doLoop = False break except Exception as ex: # Wait and then try last request again. sleepDurationSeconds = 900 # 15 minutes. print("Got exception when querying Twitter search API: " + ex.message) # Save the portion of the events JSON. #with open('C:/Dev/labs-heartbeat-visualization-framework/json-backup/' + cacheKey + '-part-' + str(totalEventCnt) + '.json', "w") as textFile: with open(settings.SECONDARY_CACHE_DIRECTORY + cacheKey + '-part-' + str(totalEventCnt) + '.json', "w") as textFile: textFile.write(json.dumps(events, default=json_util.default)) print("Sleeping for " + str(sleepDurationSeconds) + " seconds.") time.sleep(sleepDurationSeconds) # Reset the tweet counter to make sure we don't artificually trigger the loop exit condition. tweetCnt = -1 print("Time to wake up and try again from maxId = " + str(maxId)) if lastTweet != None: maxId = long(lastTweet['id_str']) # Return the file list as JSON. response = { 'heartbeat_events': events, 'timestamp_max': tsMax, 'timestamp_min': tsMin, 'allowed_event_keys': [hashtag] }; # Now cache response. cache.set(cacheKey, response, 43200) # 12 hours # Finally, store the events in a text file (TODO: I may remove this later). with open(secondaryCacheFilePath, "w") as textFile: textFile.write(json.dumps(response, default=json_util.default)) ser = json.dumps(response, default=json_util.default) return HttpResponse(ser, mimetype="application/json") except Exception as ex: # Respond with error as JSON. return HttpResponse(ApiResponse.from_exception(ex).to_json(), mimetype="application/json")