def coleta_tweets(): try: ts = TwitterSearch(consumer_key='', consumer_secret='', access_token='', access_token_secret='') tso = TwitterSearchOrder() tso.set_keywords(['Harry potter']) tso.set_language('pt') df = [] for tweet in ts.search_tweets_iterable(tso): df.append('@%s tweeted: %s' % (tweet['user']['screen_name'], tweet['text']) + ',') #print( '@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text']) ) print('Coleta finalizada!') df = pd.DataFrame(df) #df.to_csv('tweets.txt') #print('Arquivo salvo.') return df except TwitterSearchException as e: print(e)
def setup(self): self._client = TwitterSearch( consumer_key=self._config['consumer_key'], consumer_secret=self._config['consumer_secret'], access_token=self._config['access_token'], access_token_secret=self._config['access_token_secret']) self._last_id = None
def coleta_tweets(): try: ts = TwitterSearch( consumer_key = '', consumer_secret = '', access_token = '', access_token_secret = '' ) tso = TwitterSearchOrder() tso.set_keywords(['Harry potter']) tso.set_language('pt') df = [] for tweet in ts.search_tweets_iterable(tso): df.append('@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text'])+',') #print( '@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text']) ) print('Coleta finalizada!') df = pd.DataFrame(df) #df.to_csv('tweets.txt') #print('Arquivo salvo.') return df except TwitterSearchException as e: print(e)
def SearchOnTwitter(keywords, language): """ Allows to test twitter search library -> Print tweets of interest. Parameters: - keywords : string array that tweets must contain - language : string indicating the language of the interest tweets Return : - array of tweets """ tweets = [] try: tso = TwitterSearchOrder() # create a TwitterSearchOrder object tso.set_keywords( keywords ) # let's define all words we would like to have a look for tso.set_language(language) # we want to see German tweets only tso.set_include_entities( False) # and don't give us all those entity information # it's about time to create a TwitterSearch object with our secret tokens ts = TwitterSearch(consumer_key=consumer_key, consumer_secret=consumer_secret, access_token=access_token, access_token_secret=access_token_secret) # this is where the fun actually starts :) for tweet in ts.search_tweets_iterable(tso): tweets.append(tweet['text']) except TwitterSearchException as e: # take care of all those ugly errors if there are some print(e) return tweets
def count_for_tag(support, hashtag): print "Searching for %s " % hashtag try: tso = TwitterSearchOrder() # create a TwitterSearchOrder object tso.setKeywords([hashtag]) # let's define all words we would like to have a look for # tso.setLanguage('en') tso.setCount(100) # please dear Mr Twitter, only give us 7 results per page tso.setIncludeEntities(False) # and don't give us all those entity information # it's about time to create a TwitterSearch object with our secret tokens ts = TwitterSearch( consumer_key="TNX9jTHJgTEyB1IcUECPJ4uSY", consumer_secret="5B1R0geyT1Iv2mBc601gaDwuBBjVkabab72UXbzVTDEJ7Z6XAb", access_token="143109809-5IAGEaGuuiBRjVVJT9WHUQnAQlOkVcemzhnOpMkx", access_token_secret="Yh5WeJo9Z01j42jbTk6tL47zl1Rdox1LJ1d2lJgAAPm0r", verify=False, ) for tweet in ts.searchTweetsIterable(tso): # print tweet['coordinates'] if tweet["place"] != None and tweet["place"].has_key("country"): country = tweet["place"]["country"] __increment(support, country) continue location = tweet["user"]["location"] if len(location) == 0: continue # try: # results = Geocoder.geocode(location) # country = results[0].country # increment(support, country) # continue # except GeocoderError as e: # #print "Could not parse ", location # pass country = None for cn, cd in country_desc.iteritems(): if cn.lower() in location.lower(): country = cn break for desc_part in cd: desc_word = re.compile(r"\b%s\b" % desc_part) if desc_word.search(location): country = cn break if country is not None: break if country is None: pass # print( '%s' % location) else: # print ("Found %s in \"%s\"" % (country, location)) __increment(support, country) if ts.getStatistics()["tweets"] > 1000: break except TwitterSearchException as e: # take care of all those ugly errors if there are some print (e)
def collect_tweets_from_city(arg): if arg in location.keys(): city = arg else: raise KeyError("[WARNING CASE-SENSITIVE] %s location geocode are not known, available locations %s"%(arg,location.keys())) try: tso = TwitterSearchOrder() tso.setCount(100) tso.setIncludeEntities(True) tso.setResultType('mixed') tso.setGeocode(**location[city]) ts = TwitterSearch(**twitter_auth1) conn = MySQLdb.connect(**mysql_auth) curr = conn.cursor() with conn: curr.execute("DROP TABLE IF EXISTS %sTable"%self.table) print 'table dropped' curr.execute("CREATE TABLE IF NOT EXISTS %sTable (Id INT PRIMARY KEY AUTO_INCREMENT,lat DECIMAL(7,5),lon DECIMAL(8,5),place VARCHAR(200),created_at VARCHAR(40),hashtags VARCHAR(200) CHARACTER SET utf8 COLLATE utf8_general_ci,urls VARCHAR(160) CHARACTER SET utf8 COLLATE utf8_general_ci,user_mentions VARCHAR(200) CHARACTER SET utf8 COLLATE utf8_general_ci,media VARCHAR(200) CHARACTER SET utf8 COLLATE utf8_general_ci,favorite_count INT,filter_level VARCHAR(10),tid BIGINT,in_reply_to_screen_name VARCHAR(20) CHARACTER SET utf8 COLLATE utf8_general_ci,in_reply_to_status_id BIGINT,in_reply_to_user_id BIGINT,retweet_count INT,source VARCHAR(200) CHARACTER SET utf8 COLLATE utf8_general_ci,text VARCHAR(160) CHARACTER SET utf8 COLLATE utf8_general_ci,user_id BIGINT,screen_name VARCHAR(100) CHARACTER SET utf8 COLLATE utf8_general_ci,user_location VARCHAR(40) CHARACTER SET utf8 COLLATE utf8_general_ci,retweeted_status_id BIGINT)"%self.table) for tweet in ts.searchTweetsIterable(tso): if tweet['coordinates']!=None: lat = float(tweet['coordinates']['coordinates'][1]) lon = float(tweet['coordinates']['coordinates'][0]) else: lat = 0 lon = 0 place = tweet['place']['full_name'] created_at = tweet['created_at'] hashtags = "%20".join([ item['text'] for item in tweet['entities']['hashtags']]) urls = "%20".join([ item['url'] for item in tweet['entities']['urls']]) user_mentions = "%20".join([ item['id_str']+"%40"+item["screen_name"] for item in tweet['entities']['user_mentions']]) media = "%20".join([ item['id_str']+"%40"+item["media_url"] for item in tweet['entities']['media']]) if 'media' in tweet['entities'].keys() else '' favorite_count = tweet["favorite_count"] if tweet["favorite_count"]!=None else 0 filter_level = tweet["filter_level"] if 'filter_level' in tweet.keys() else '' tid = tweet['id'] in_reply_to_screen_name = tweet["in_reply_to_screen_name"] if tweet["in_reply_to_screen_name"]!=None else 0 in_reply_to_status_id = tweet["in_reply_to_status_id"] if tweet["in_reply_to_status_id"]!=None else 0 in_reply_to_user_id = tweet["in_reply_to_user_id"] if tweet["in_reply_to_user_id"]!=None else 0 retweet_count = tweet["retweet_count"] if tweet["retweet_count"]!=None else 0 source = tweet["source"].replace("'","\\'").replace('"','\\"') text = tweet["text"].replace("'","\\'").replace('"','\\"') user_id = tweet["user"]["id"] screen_name = tweet["user"]["screen_name"] user_location = tweet["user"]["location"] retweeted_status_id = tweet["retweeted_status"]["id"] if "retweeted_status" in tweet.keys() else 0 query = """INSERT INTO %sTable(lat,lon,place,created_at,hashtags,urls,user_mentions,media,favorite_count,filter_level,tid,in_reply_to_screen_name,in_reply_to_status_id,in_reply_to_user_id,retweet_count,source,text,user_id,screen_name,user_location,retweeted_status_id) VALUES ("%f","%f","%s","%s","%s","%s","%s","%s","%d","%s","%d","%s","%d","%d","%d","%s","%s","%d","%s","%s","%d")"""%(city,lat,lon,place,created_at,hashtags,urls,user_mentions,media,favorite_count,filter_level,tid,in_reply_to_screen_name,in_reply_to_status_id,in_reply_to_user_id,retweet_count,source,text,user_id,screen_name,user_location,retweeted_status_id) curr.execute(query) except TwitterSearchException as e: print (e)
def __init__(self): # Login to twitter handle using oauth self.twitter = TwitterSearch( consumer_key='PYX15cyo7pBYyrny2kXomGf4N', consumer_secret= 'mCMtxofBFLtJv1GVRXeB9w0pw64ObRDPGmIZEGRo3uyl1oPVci', access_token='3369817647-TTV9HTaWAIbvrbpJwgXkVQtm0akEMSihl43No3P', access_token_secret='WjxjNW8YWmRSL65eIYYhQd4DsBSECO7wKpZpKsfLcR99q' )
def __init__(self, search_query): self.search_query = search_query self.library = TwitterSearch( consumer_key=os.getenv("SEARCHTWEETS_CONSUMER_KEY"), consumer_secret=os.getenv("SEARCHTWEETS_CONSUMER_SECRET"), access_token=os.getenv("SEARCHTWEETS_ACCESS_TOKEN"), access_token_secret=os.getenv("SEARCHTWEETS_ACCESS_TOKEN_SECRET") ) twitter_search_order = self.search_query.create_twitter_search_order() self.iter = iter(self.library.search_tweets_iterable(twitter_search_order))
def __init__(self, movie, limit=10, language='en'): self.movie = movie self.limit = 10 self.language = language self.client = TwitterSearch( consumer_key=os.environ.get('TWITTER_CONSUMER_KEY'), consumer_secret=os.environ.get('TWITTER_CONSUMER_SECRET'), access_token=os.environ.get('TWITTER_ACCESS_TOKEN'), access_token_secret=os.environ.get('TWITTER_TOKEN_SECRET') )
def __init__(self): self.properties = TwitterSearchOrder() self.properties.set_keywords(['']) self.properties.set_count(100) self.properties.set_include_entities(False) self.api = TwitterSearch( consumer_key=DATASOURCES['twitter']['api_key'], consumer_secret=DATASOURCES['twitter']['api_secret'], access_token=DATASOURCES['twitter']['access_token'], access_token_secret=DATASOURCES['twitter']['access_token_secret'])
def setup(self): self._client = TwitterSearch( consumer_key=self._config['consumer_key'], consumer_secret=self._config['consumer_secret'], access_token=self._config['access_token'], access_token_secret=self._config['access_token_secret']) self._last_id = None if type(self._config['query']) is not list: self._logger.exception('Twitter sensor failed. "query" config \ value is not a list') raise ValueError('[TwitterSearchSensor]: "query" is not a list')
def main(word): searchObj = TwitterSearch(word) result = searchObj.search_start(amount = 10) del searchObj resultlist=[] print('単語リスト作成中') for tweet in result[:20]: resultlist.extend(ja_en_ja(tweet)) counterObj = Counter(resultlist) print('htmlファイル作成') twodata_into_wordcloud( list(counterObj.keys()),list(counterObj.values()) , minimamnum=0)
def get_tweets(query): from TwitterSearch import TwitterSearch, TwitterSearchOrder import itertools tso = TwitterSearchOrder() tso.set_keywords(query.get('query', '').split(' ')) # tso.set_language('en') tso.set_include_entities(False) ts = TwitterSearch( consumer_key=app.config.get('TWITTER_CONSUMER_KEY'), consumer_secret=app.config.get('TWITTER_CONSUMER_SECRET'), access_token=app.config.get('TWITTER_ACCESS_TOKEN'), access_token_secret=app.config.get('TWITTER_ACCESS_TOKEN_SECRET') ) return list(itertools.islice(ts.search_tweets_iterable(tso), 0, int(query.get('count', 5))))
def getTweetsByWords(authdata, word,limit=100): tso = TwitterSearchOrder() tso.set_keywords([word]) tso.set_include_entities(False) ts = TwitterSearch(consumer_key=authdata['consumer_key'], consumer_secret=authdata['consumer_secret'], access_token=authdata['access_token'], access_token_secret=authdata['access_token_secret']) result = [] c = 0 for tweet in ts.search_tweets_iterable(tso): if c == limit: break result.append(tweet['text']) print(c) c+=1 return {'status': 'Task Completed', 'result': result}
def count_tweets_of_app(app_name): """ Counts how many tweets are with the hashtag app_name and COMPETITION_NAME from diferent users Args: app_name: name of the app of whose tweets are to be counted Returns: num of votes (tweets) """ from TwitterSearch import TwitterSearchOrder, TwitterSearch, TwitterSearchException try: tso = TwitterSearchOrder() # create a TwitterSearchOrder object tso.set_keywords([ check_hashtag(app_name), COMPETITION_NAME ]) # let's define all words we would like to have a look for # it's about time to create a TwitterSearch object with our secret tokens ts = TwitterSearch(consumer_key=TWITTER_API_KEY, consumer_secret=TWITTER_API_KEY_SECRET, access_token=TWITTER_ACCESS_TOKEN, access_token_secret=TWITTER_ACCESS_TOKEN_SECRET) # this is where the fun actually starts :) users = [] #count = 0 for tweet in ts.search_tweets_iterable(tso): user = tweet['user']['id'] #Check if tweet if from the same user if user not in users: #more info https://dev.twitter.com/overview/api/tweets time_tweet = datetime.datetime.strptime( tweet['created_at'], '%a %b %d %H:%M:%S +0000 %Y') if (COMPETITION_START_DATE < time_tweet) & (time_tweet < COMPETITION_END_DATE): users.append(user) #count += 1 + tweet["retweet_count"] #print( '@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text'] ) ) return len(users) except TwitterSearchException as e: # take care of all those ugly errors if there are some print(e) return -1
def initialize_twitter_client(self): self.twitter_client = TwitterSearch( consumer_key=self.settings['TWITTER_CONSUMER_KEY'], consumer_secret=self.settings['TWITTER_CONSUMER_SECRET'], access_token=self.settings['TWITTER_ACCESS_TOKEN'], access_token_secret=self.settings['TWITTER_ACCESS_SECRET'], )
def reload_api(): """ Reinitialize the API client with a new API key. This method may block if no valid keys are currently available. """ global API API = TwitterSearch(*KEYS.advance().splitlines())
def init_tw_search_lib(self, domain_keyword): """ Init TwitterSearch Library (Copyright (C) 2013 Christian Koepp https://github.com/ckoepp/TwitterSearch/tree/master) Arguments: domain_keyword {str} -- The keyword from <domain_keywords_dict> that will be used to search in Twitter Returns: [TwitterSearch] -- TwitterSearch object with our secret tokens [TwitterSearchOrder] -- TwitterSearchOrder object with initialized attributes """ try: tso = TwitterSearchOrder() # create a TwitterSearchOrder object tso.add_keyword( domain_keyword) # add keyword for search in Twitter tso.set_language('en') # we want to see English tweets only tso.set_include_entities( False) # and don't give us all those entity information # it's about time to create a TwitterSearch object with our secret tokens ts = TwitterSearch( consumer_key='<your-CONSUMER_KEY>', consumer_secret='<your-CONSUMER_SECRET>', access_token='<your-ACCESS_TOKEN>', access_token_secret='<your-ACCESS_TOKEN_SECRET>') except TwitterSearchException as e: # take care of all those ugly errors if there are some print(e) return ts, tso
def hello_world(keywords): response = make_response() response.headers.add("Access-Control-Allow-Origin", "*") response.headers.add("Access-Control-Allow-Headers", "*") response.headers.add("Access-Control-Allow-Methods", "*") try: tso = TwitterSearchOrder() tso.set_keywords([keywords]) ts = TwitterSearch(keys) tweets = [] for tweet in ts.search_tweets_iterable(tso): tweets.append(tweet['text']) except TwitterSearchException as e: print(e) response = jsonify({'status': 200, 'results': tweets}) return response
def setup(self): self._client = TwitterSearch( consumer_key=self._config['consumer_key'], consumer_secret=self._config['consumer_secret'], access_token=self._config['access_token'], access_token_secret=self._config['access_token_secret'] ) self._last_id = None
def get(self, user_handle=None): from TwitterSearch import TwitterSearch, TwitterUserOrder, TwitterSearchException if user_handle is None: return jsonify({ 'response': [], 'status': 400, 'message': 'No handle provided' }) try: import itertools user_profile = TwitterUserOrder(user_handle) # Hardcode our API keys for optimal security consumer = 'CedAugFXME85jW5MRraKTJFgO' consumer_secret = 'RjLOp02iZqQnGM5cOt4bBeFjFHtFyVW09NSH14rVEyPouFvWLs' access = '378294925-zdTFn1Gf8rcBzv6gshfjfONZG9ZSc8QFUlZd1YO8' access_secret = '0MV9lR9kFdoUkLnKoWgdZCl74vunMAoCR7INC7pQYrSfW' ts = TwitterSearch(consumer_key=consumer, consumer_secret=consumer_secret, access_token=access, access_token_secret=access_secret) # Fetch a list of tweets from the user with the provided handle tweet_iterator = ts.search_tweets_iterable(user_profile) # By default, we fetch only 20 tweets unless a query parameter is specified num_tweets = int(request.args.get('numTweets', 20)) resolved_tweets = list(itertools.islice(tweet_iterator, num_tweets)) return jsonify({'response': resolved_tweets, 'status': 200}) except TwitterSearchException as e: return jsonify({ 'response': [], 'status': 404, 'message': 'There was a problem fetching the data for {}: {}'.format( user_handle, e) })
def __init__(self): self.properties = TwitterSearchOrder() self.properties.set_keywords(['']) self.properties.set_count(100) self.properties.set_include_entities(False) self.api = TwitterSearch(consumer_key=DATASOURCES['twitter']['api_key'], consumer_secret=DATASOURCES['twitter']['api_secret'], access_token=DATASOURCES['twitter']['access_token'], access_token_secret=DATASOURCES['twitter']['access_token_secret'])
def fetch_twitter_entries(self): origin = self max_id = origin.max_id since_id = None area = origin.area try: count = 50 tso = TwitterSearchOrder() # create a TwitterSearchOrder object tso.setKeywords(['']) # let's define all words we would like to have a look for tso.setResultType('recent') if origin.max_id: tso.setMaxID(origin.max_id-1) # as per twitter docs tso.setLanguage('en') # we want to see German tweets only tso.setGeocode(latitude=area.lat, longitude=area.long, radius=area.rad, km=True) tso.setCount(count) # please dear Mr Twitter, only give us 7 results per page tso.setIncludeEntities(False) # and don't give us all those entity information # it's about time to create a TwitterSearch object with our secret tokens ts = TwitterSearch( **settings.TWITTER ) total = 0 for tweet in ts.searchTweetsIterable(tso): # this is where the fun actually starts :) max_id = save_tweets(area, origin, tweet) if not since_id: since_id = max_id #import ipdb;ipdb.set_trace() #print( '@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text'] ) ) total += 1 if total >= 50: break except TwitterSearchException as e: # take care of all those ugly errors if there are some print(e) # except: # pass finally: origin.max_id = max_id origin.save()
def getTweets(politician_id, searchOnlySexistWords): try: politician = Politician.objects.get(id=politician_id) politician_names = [ politician.first_name + " " + politician.last_name, politician.username ] tso = TwitterSearchOrder() searchTerms = [] if searchOnlySexistWords: sexistWords = CONFIG['SEXISTWORDS'] for word in sexistWords: for politician_name in politician_names: searchTerms.append(word + ' ' + politician_name) elif searchOnlySexistWords is False: searchTerms = politician_names tso.set_keywords(searchTerms, or_operator=True) tso.set_language("en") tso.set_include_entities(False) querystr = tso.create_search_url() tso.set_search_url(querystr + "&tweet_mode=extended") ts = TwitterSearch(consumer_key=CONFIG['CONSUMER_KEY'], consumer_secret=CONFIG['CONSUMER_SECRET'], access_token=CONFIG['ACCESS_TOKEN'], access_token_secret=CONFIG['ACCESS_TOKEN_SECRET']) print("**Processing tweets for " + str(politician.first_name + " " + politician.last_name) + "**") if searchOnlySexistWords: tweets = ts.search_tweets_iterable(tso) return tweets else: # will limit to 100 if not only searching sexist words tweets = ts.search_tweets(tso) return tweets['content']['statuses'] except TwitterSearchException as e: logging.exception("Unable to get new tweets because of" + str(e))
def getStats(self, url, proxy, headers, timeout): """returns (retweet + favorite count) count from twitter API , url is url that could be in a tweet, proxy is 'ip:port' in string, headers should contain user-agent in as an item in dictionary, timeout is maximum time while waiting for response and is an int""" count = 0 tso = TwitterSearchOrder() tso.set_search_url('q=' + url) tso.set_result_type(result_type='mixed') tso.set_include_entities(False) tso.set_count(100) ts = TwitterSearch(consumer_key=self.ConsumerKey, consumer_secret=self.ConsumerSecret, access_token=self.AccessTokenKey, access_token_secret=self.AccessTokenSecret, proxy=proxy) for tweet in ts.search_tweets_iterable(tso): count += tweet['retweet_count'] + tweet['favorite_count'] return count
def search(self): try: tso = TwitterSearchOrder() tso.set_keywords(*self.search_terms) tso.set_include_entities(False) tso.set_count(100) ts = TwitterSearch( consumer_key='aOUVcCWLIYEbUvHW5dLjVc7Gf', consumer_secret='8qb3LTAHbj43J40Rxm0RMLAOaP4QoEHfFVGTeJ3S6iUmSBq6JJ', access_token='4251433696-ulZx8dJ3QZE95ds0PhXNldeKFhjhBUoGSuGycSE', access_token_secret='wx65NQaBHHgwC4xLOgRxFSs4kWWzkg09KkgNkAKHZryks' ) for tweet in ts.search_tweets_iterable(tso): self.data.append(tweet['text']) self.save_line(tweet['text']) # self.save_data(self.data) except TwitterSearchException as exception: print(exception)
def setup(self): self._client = TwitterSearch( consumer_key=self._config['consumer_key'], consumer_secret=self._config['consumer_secret'], access_token=self._config['access_token'], access_token_secret=self._config['access_token_secret'] ) self._last_id = None if type(self._config['query']) is not list: self._logger.exception('Twitter sensor failed. "query" config \ value is not a list') raise ValueError('[TwitterSearchSensor]: "query" is not a list')
def get_search_api(config): global _API if _API: return _API consumer_key = config['api']['twitterconsumerkey'] consumer_secret = config['api']['twitterconsumersecret'] access_token = config['api']['twitteraccesstoken'] access_token_secret = config['api']['twitteraccesstokensecret'] _API = TwitterSearch(consumer_key=consumer_key, consumer_secret=consumer_secret, access_token=access_token, access_token_secret=access_token_secret) return _API
class TwitterService(object): def __init__(self): self.properties = TwitterSearchOrder() self.properties.set_keywords(['']) self.properties.set_count(100) self.properties.set_include_entities(False) self.api = TwitterSearch(consumer_key=DATASOURCES['twitter']['api_key'], consumer_secret=DATASOURCES['twitter']['api_secret'], access_token=DATASOURCES['twitter']['access_token'], access_token_secret=DATASOURCES['twitter']['access_token_secret']) def get_tweets(self, lat, lng): self.properties.set_geocode(float(lat), float(lng), 1) response = self.api.search_tweets(self.properties) return response
def getTweets(politician_id): try: politician = Politician.objects.get(id=politician_id) politician_names = [politician.first_name + " " + politician.last_name, politician.last_name, politician.username] print("Getting Tweets for " + str(politician.first_name + " " + politician.last_name)) tso = TwitterSearchOrder() sexistWords = ['bitch', 'skank', 'rape'] searchTerms = [] for word in sexistWords: for politician in politician_names: searchTerms.append(word + ' ' + politician) tso.set_keywords(searchTerms, or_operator=True) print(searchTerms) tso.set_language("en") tso.set_include_entities(False) querystr = tso.create_search_url() tso.set_search_url(querystr + "&tweet_mode=extended") ts = TwitterSearch( consumer_key = os.environ.get('CONSUMER_KEY', CONFIG['CONSUMER_KEY']), consumer_secret = os.environ.get('CONSUMER_SECRET', CONFIG['CONSUMER_SECRET']), access_token = os.environ.get('ACCESS_TOKEN', CONFIG['ACCESS_TOKEN']), access_token_secret = os.environ.get('ACCESS_TOKEN_SECRET', CONFIG['ACCESS_TOKEN_SECRET']) ) return ts.search_tweets_iterable(tso) except TwitterSearchException as e: logging.exception("Unable to get new tweets because of" + str(e)) # if __name__ == "__main__": # getTweets()
class TwitterScrape: """Methods to gather data from twitter searches""" def __init__(self): # Login to twitter handle using oauth self.twitter = TwitterSearch( consumer_key='PYX15cyo7pBYyrny2kXomGf4N', consumer_secret= 'mCMtxofBFLtJv1GVRXeB9w0pw64ObRDPGmIZEGRo3uyl1oPVci', access_token='3369817647-TTV9HTaWAIbvrbpJwgXkVQtm0akEMSihl43No3P', access_token_secret='WjxjNW8YWmRSL65eIYYhQd4DsBSECO7wKpZpKsfLcR99q' ) def search(self, query, lang='en', n=10**5): """ Search twitter for specified query. Function returns n tweets or as many as can be found for that query. Parameters: query -- Search query (String) lang -- Specify language of tweets, optional, default: 'en' (String) n -- Number of tweets to return, optional, default: 10**3 (Int) Returns: tweets_out -- Pandas series of tweets of length n """ # Initialise container tweets_out = [] # Setup twitter search tso = TwitterSearchOrder() tso.set_keywords([query]) tso.set_language(lang) tso.set_include_entities(False) # Begin search sys.stdout.write("Tweet number out of {0}: ".format(n)) for i, tweet in enumerate(self.twitter.search_tweets_iterable(tso)): # Break from loop when n tweets are reached if i == n: break # Output progress if i % 100 == 0: sys.stdout.write('{0} '.format(i)) sys.stdout.flush() # Add the next tweet to the container tweets_out.append('%s' % (tweet['text'])) print # Return as pandas series as it's easier to work with return pd.Series(tweets_out)
class TwitterService(object): def __init__(self): self.properties = TwitterSearchOrder() self.properties.set_keywords(['']) self.properties.set_count(100) self.properties.set_include_entities(False) self.api = TwitterSearch( consumer_key=DATASOURCES['twitter']['api_key'], consumer_secret=DATASOURCES['twitter']['api_secret'], access_token=DATASOURCES['twitter']['access_token'], access_token_secret=DATASOURCES['twitter']['access_token_secret']) def get_tweets(self, lat, lng): self.properties.set_geocode(float(lat), float(lng), 1) response = self.api.search_tweets(self.properties) return response
def authenticate_twitter(): # it's about time to create a TwitterSearch object with our secret tokens # The tokens are stored in SSM and encrypted ts = TwitterSearch(consumer_key=(ssm.get_parameter( Name='twitter-consumer-key', WithDecryption=True))['Parameter']['Value'], consumer_secret=(ssm.get_parameter( Name='twitter-consumer-secret', WithDecryption=True))['Parameter']['Value'], access_token=(ssm.get_parameter( Name='twitter-access-token', WithDecryption=True))['Parameter']['Value'], access_token_secret=(ssm.get_parameter( Name='twitter-access-token-secret', WithDecryption=True))['Parameter']['Value']) return ts
class TwitterReviews(object): def __init__(self, movie, limit=10, language='en'): self.movie = movie self.limit = 10 self.language = language self.client = TwitterSearch( consumer_key=os.environ.get('TWITTER_CONSUMER_KEY'), consumer_secret=os.environ.get('TWITTER_CONSUMER_SECRET'), access_token=os.environ.get('TWITTER_ACCESS_TOKEN'), access_token_secret=os.environ.get('TWITTER_TOKEN_SECRET') ) def __enter__(self): self.client.connect() return self def __exit__(self, exc_type, exc_val, exc_tb): if exc_type == TwitterSearchException: logging.exception(str(exc_val)) self.client.cleanUp() self.client.disconnect() @property def reviews(self): return Reviews(self._get_results(), limit=self.limit) def _prepare_request(self): tso = TwitterSearchOrder() tso.setKeywords(self._get_keywords()) tso.setLanguage(self.language) tso.setIncludeEntities(False) return tso def _get_keywords(self): return ['#' + self.movie + 'Movie'] def _get_results(self): request = self._prepare_request() return self.client.getSearchResults(request)
def collect_tweets(keyword, count, force=False): from TwitterSearch import TwitterSearch from TwitterSearch import TwitterSearchOrder import pymongo from dateutil.parser import parse from alchemyapi import AlchemyAPI import ConfigParser # try: # keyword = sys.argv[1] # count = int(sys.argv[2]) # except IndexError: # e_too_few_args = "You did not enter enough arguments. Two are required: keyword, and count" # raise Exception(e_too_few_args) # try: # if sys.argv[3] == '-f': # force = True # else: # e_invalid_argument = "The only option available is -f. It is used to force the script to continue when the Alchemy API limit is exceeded." # raise Exception(e_invalid_argument) # except IndexError: # force = False # Read the config file for config variables config = ConfigParser.RawConfigParser() config.read('config.cfg') mongo_url = config.get('Mongo', 'db_url') # Connect to the Mongo database using MongoClient client = pymongo.MongoClient(mongo_url) db = client.get_default_database() # Access/create the collection based on the command line argument tweets = db[keyword] #Generate the alchemyapi variable alchemyapi = AlchemyAPI() # To accommodate for hashtags the user can substitute a . for the # in the command line. Lines 30 & 31 return it to a hashtag for the search. if keyword[0] is ".": keyword = keyword.replace('.', '#') # Lines 33-42 ensure that the query is not doing duplicate work. # First, it counts to see how many documents exist in the collection db_count = tweets.count() # If there are documents in the collection, the collection is queried, tweet objects are sorted by date, and the tweet_id of the most recent tweet is retrieved and later set as the "since_id" if db_count is not 0: latest_id = tweets.find( {}, { 'object.tweet_id':1 } ).sort("startedAtTime").limit(1) latest_id_str = latest_id[db_count-1]['object']['tweet_id'] latest_id_int = int(latest_id_str) print 'Count of documents in the ' + keyword + ' collection is not 0. It is ' + str(db_count) + '. Mongo is now identifying the latest tweet ID to append as a parameter to the API call.' # If ther are no documents in the collection, no queries are done, and the since_id is left out of the API call. else: print 'The Mongo collection ' + keyword + ' is empty. The script will now collect all tweets.' # create a TwitterSearchOrder object tso = TwitterSearchOrder() # let's define all words we would like to have a look for tso.set_keywords([keyword]) # Select language tso.set_language('en') # Include Entity information tso.set_include_entities(True) if db_count is not 0: tso.set_since_id(latest_id_int) print 'Since the document count in the ' + keyword + ' collection is above 0, the since_id uses the parameter of the latest tweet so that only new tweets are collected.' else: print 'No documents exist in the ' + keyword + ' collection right now so the since_id parameter will be empty and all tweets will be collected.' # Create a TwitterSearch object with our secret tokens ts = TwitterSearch( consumer_key = config.get('Twitter', 'consumer_key'), consumer_secret = config.get('Twitter', 'consumer_secret'), access_token = config.get('Twitter', 'access_token'), access_token_secret = config.get('Twitter', 'access_token_secret') ) # Perform the search twitter_search = ts.search_tweets_iterable(tso) # Start the insert count variable db_inserts = 0 # this is where the fun actually starts :) try: for tweet in twitter_search: if db_inserts < count: mentions_list = [] hashtags_list = [] # Create the caliper_tweet object caliper_tweet = { "context": "http://purl.imsglobal.org/ctx/caliper/v1/MessagingEvent", "type": "MessagingEvent", "startedAtTime": "", ## Can be used to query Twitter API for user information "actor": "", "verb": "tweetSent", "object": { "type": "MessagingEvent", "tweet_id": "", "tweet_uri": "", "subtype": "tweet", ## "to" should be calculated by checking in_reply_to_user_id_str is null. If it's not null, then it should be concatenated to "uri:twitter/user/" and stored in "object"['to'] "to": "", "author": { "author_uri": "", "author_alias": "", "author_name": "", }, "text": "", "sentiment": { "type": "", "score": "", "color": "" }, "parent": "", ## "mentions" is an array of the caliper IDs from the user_mentions objects array "user_mentions": [], ## "hashtags" is an array of the hashtag texts included in the tweet entities "hashtags": [] } } # Set the re-usable variables tweet_text = tweet['text'] ## AlchemyAPI Sentiment Analysis tweet_sentiment = '' response = alchemyapi.sentiment('text', tweet_text) if 'docSentiment' in response.keys(): if 'score' in response['docSentiment']: tweet_sentiment_score = response['docSentiment']['score'] tweet_sentiment_score = float(tweet_sentiment_score) tweet_sentiment_score = round(tweet_sentiment_score, 2) else: tweet_sentiment_score = 0 tweet_sentiment_type = response['docSentiment']['type'] tweet_sentiment_score_a = abs(tweet_sentiment_score) if (tweet_sentiment_score) > 0: tweet_sentiment_color = "rgba(0,255,0," + str(tweet_sentiment_score_a) + ")" else: tweet_sentiment_color = "rgba(255,0,0," + str(tweet_sentiment_score_a) + ")" elif force == True: print 'Force option set to true. The tweet_sentiment object will be set with API Limit Exceeded values.' tweet_sentiment_type = 'API Limit Exceeded' tweet_sentiment_score = 0 tweet_sentiment_color = 'rgba(0,0,0,0)' else: e_alchemy_api_limit = 'Alchemy API daily limit exceeded. Retry search with force=True to continue' raise Exception(e_alchemy_api_limit) ds = tweet['created_at'] tweet_date = parse(ds) caliper_tweet['startedAtTime'] = tweet_date caliper_tweet['actor'] = 'student:' + tweet['user']['screen_name'] caliper_tweet['object']['tweet_uri'] = 'https://twitter.com/' + tweet['user']['screen_name'] + '/status/' + tweet['id_str'] caliper_tweet['object']['tweet_id'] = tweet['id_str'] if tweet['in_reply_to_user_id_str'] is None: caliper_tweet['object']['to'] = 'NoReply' caliper_tweet['object']['parent'] = 'NoReply' else: caliper_tweet['object']['to'] = 'https://twitter.com/intent/user?user_id=' + tweet['in_reply_to_user_id_str'] if tweet['in_reply_to_status_id_str'] is None: caliper_tweet['object']['parent'] = 'None' else: caliper_tweet['object']['parent'] = 'https://twitter.com/' + tweet['user']['screen_name'] + '/status/' + tweet['in_reply_to_status_id_str'] caliper_tweet['object']['author']['author_uri'] = 'https://twitter.com/intent/user?user_id=' + tweet['user']['id_str'] caliper_tweet['object']['author']['author_alias'] = tweet['user']['screen_name'] caliper_tweet['object']['author']['author_name'] = tweet['user']['name'] caliper_tweet['object']['text'] = unicode(tweet['text']) caliper_tweet['object']['sentiment']['type'] = tweet_sentiment_type caliper_tweet['object']['sentiment']['score'] = tweet_sentiment_score caliper_tweet['object']['sentiment']['color'] = tweet_sentiment_color for x in list(tweet['entities']['hashtags']): hashtag = x['text'] hashtags_list.append(hashtag) for x in list(tweet['entities']['user_mentions']): mention = x['id_str'] mentions_list.append(mention) caliper_tweet['object']['user_mentions'] = mentions_list caliper_tweet['object']['hashtags'] = hashtags_list tweets.insert(caliper_tweet) db_inserts = db_inserts + 1 else: raise StopIteration except StopIteration: print str(db_inserts) + " inserts made in the " + keyword + " collection."
cities = f.read().splitlines() # read all lines into cities for city in cities: try: tso = TwitterSearchOrder() tso.set_keywords(['#travel', '#holiday', '#vacation'], or_operator=True) tso.add_keyword( city) # add the city name as one of the search keywords tso.set_language('en') # we want to see English tweets only tso.set_count(100) # search for 100 pages of tweets tso.set_result_type( 'mixed') # search for both the popular and real-time tweets tso.set_include_entities(False) ts = TwitterSearch(consumer_key='aaabbb', consumer_secret='cccddd', access_token='112233', access_token_secret='445566') def my_callback_closure( current_ts_instance ): # accepts ONE argument: an instance of TwitterSearch queries, tweets_seen = current_ts_instance.get_statistics() if queries > 0 and (queries % 5) == 0: # trigger delay every 5th query time.sleep(60) # sleep for 60 seconds filename = city + '.txt' f = open('./result/' + filename, 'w') for tweet in ts.search_tweets_iterable(tso, callback=my_callback_closure): if 'RT @' not in tweet['text']:
def Tweets(): try: max_feeds = 10 tso = TwitterSearchOrder() # create a TwitterSearchOrder object tso.set_language('en') tso.set_include_entities( False) # and don't give us all those entity information tso.set_until(new_date) tso.arguments.update({'tweet_mode': 'extended'}) tso.arguments.update({'truncated': 'False'}) ts = TwitterSearch(consumer_key='', consumer_secret='', access_token='', access_token_secret='', proxy='http://proxy_address') for c in range(len(MainDF)): count = 0 #kw=[MainDF['twitter'][c]] #for h in MainDF['hashtag'][c]: # kw.append(h) tso.set_keywords(MainDF['hashtag'][c]) tweets_list = [] tuo = TwitterUserOrder(MainDF['twitter'][c]) # tuo.set_language('en') tuo.set_include_entities( False) # and don't give us all those entity information # tuo.set_until(days_ago) # tuo.set_count(15) tuo.arguments.update({'tweet_mode': 'extended'}) tuo.arguments.update({'truncated': 'False'}) #for tweet in ts.search_tweets_iterable(tso): # print(tweet) # tweets_list.append([tweet['user']['screen_name'],tweet['full_text']]) for tweet in ts.search_tweets_iterable(tso): if 'retweeted_status' in tweet: None #tweets_list.append([tweet['user']['screen_name'],tweet['retweeted_status']['full_text'],'Retweet of ' + tweet['retweeted_status']['user']['screen_name']]) else: links = Find(tweet['full_text']) links = ', '.join(link for link in links) #print(tweet) tweets_list.append([ MainDF['company'][c], tweet['user']['screen_name'], tweet['full_text'], tweet['created_at'], links ]) for tweet in ts.search_tweets_iterable(tuo): if tweet['lang'] != 'en': #print(tweet) None else: # print(tweet) links = Find(tweet['full_text']) links = ', '.join(link for link in links) tweets_list.append([ MainDF['company'][c], tweet['user']['screen_name'], tweet['full_text'], tweet['created_at'], links ]) count = count + 1 if count == max_feeds: break if tweets_list != []: tweets_datasets[MainDF['company'][c]] = pd.DataFrame( tweets_list) tweets_datasets[MainDF['company'][c]].columns = [ 'Company', 'Source/User', 'Title/Tweet', 'Date', 'Link' ] tweets_datasets[MainDF['company'][c]].insert( 0, 'Category', 'Twitter') for i in range( len(tweets_datasets[MainDF['company'][c]]['Date'])): tweets_datasets[MainDF['company'][c]]['Date'][i] = parse( tweets_datasets[MainDF['company'][c]]['Date'][i]) tweets_datasets[ MainDF['company'][c]]['Date'][i] = tweets_datasets[ MainDF['company'][c]]['Date'][i].date() #print(datasets[companies_names[count]]) tw_current_companies.append(MainDF['company'][c]) else: None #tweets_list.append() #print( '@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text'] ) ) except TwitterSearchException as e: # take care of all those ugly errors if there are some print(e)
class TwitterSearchSensor(PollingSensor): def __init__(self, sensor_service, config=None, poll_interval=None): super(TwitterSearchSensor, self).__init__(sensor_service=sensor_service, config=config, poll_interval=poll_interval) self._trigger_ref = 'twitter.matched_tweet' self._logger = self._sensor_service.get_logger(__name__) def setup(self): self._client = TwitterSearch( consumer_key=self._config['consumer_key'], consumer_secret=self._config['consumer_secret'], access_token=self._config['access_token'], access_token_secret=self._config['access_token_secret'] ) self._last_id = None if type(self._config['query']) is not list: self._logger.exception('Twitter sensor failed. "query" config \ value is not a list') raise ValueError('[TwitterSearchSensor]: "query" is not a list') def poll(self): tso = TwitterSearchOrder() tso.set_keywords(self._config['query'], True) language = self._config.get('language', None) if language: tso.set_language(language) tso.set_result_type('recent') tso.set_count(self._config.get('count', 30)) tso.set_include_entities(False) last_id = self._get_last_id() if last_id: tso.set_since_id(int(last_id)) try: tweets = self._client.search_tweets(tso) tweets = tweets['content']['statuses'] except Exception as e: self._logger.exception('Polling Twitter failed: %s' % (str(e))) return tweets = list(reversed(tweets)) if tweets: self._set_last_id(last_id=tweets[-1]['id']) for tweet in tweets: self._dispatch_trigger_for_tweet(tweet=tweet) def cleanup(self): pass def add_trigger(self, trigger): pass def update_trigger(self, trigger): pass def remove_trigger(self, trigger): pass def _get_last_id(self): if not self._last_id and hasattr(self._sensor_service, 'get_value'): self._last_id = self._sensor_service.get_value(name='last_id') return self._last_id def _set_last_id(self, last_id): self._last_id = last_id if hasattr(self._sensor_service, 'set_value'): self._sensor_service.set_value(name='last_id', value=last_id) def _dispatch_trigger_for_tweet(self, tweet): trigger = self._trigger_ref url = '%s/%s/status/%s' % (BASE_URL, tweet['user']['screen_name'], tweet['id']) payload = { 'id': tweet['id'], 'created_at': tweet['created_at'], 'lang': tweet['lang'], 'place': tweet['place'], 'retweet_count': tweet['retweet_count'], 'favorite_count': tweet['favorite_count'], 'user': { 'screen_name': tweet['user']['screen_name'], 'name': tweet['user']['name'], 'location': tweet['user']['location'], 'description': tweet['user']['description'], }, 'text': tweet['text'], 'url': url } self._sensor_service.dispatch(trigger=trigger, payload=payload)
import json from TwitterSearch import TwitterSearch, TwitterSearchException, TwitterUserOrder from watson_developer_cloud import AlchemyLanguageV1 alchemy_language = AlchemyLanguageV1( api_key='a04bf0cda38fd380a2e89b9b54d6076729b568ce') # def getTweets(place): try: place = raw_input("Enter a twitter handle: ") tuo = TwitterUserOrder(place) # create a TwitterUserOrder # it's about time to create TwitterSearch object again ts = TwitterSearch( consumer_key='jP53etLOQHrdCtMc4j2Djas2z', consumer_secret='9UmpzmT1IPF6JuNzODHOyXZU19Vv1C0eYOQraQLwY04jAMGpu4', access_token='746046118652416000-BZC8oHZZ75dJe8Q8fGlMigNvKy6kVwK', access_token_secret='Nfl6UpuUUdvSy60tN6p7l3l1W0GOGKpQoIbqZg78cdrtd') def my_callback_closure( current_ts_instance ): # accepts ONE argument: an instance of TwitterSearch queries, tweets_seen = current_ts_instance.get_statistics() # if queries > 0 and (queries % 60) == 0: # trigger delay every 5th query # time.sleep(30) # sleep for 60 seconds tweetArray = [] # start asking Twitter about the timeline for tweet in ts.search_tweets_iterable(tuo, callback=my_callback_closure): # tweetArray.append(tweet['text']) # if 'accessible' in tweet['text']:
class TwitterSearchSensor(PollingSensor): def __init__(self, sensor_service, config=None, poll_interval=None): super(TwitterSearchSensor, self).__init__( sensor_service=sensor_service, config=config, poll_interval=poll_interval ) self._trigger_ref = "twitter.matched_tweet" self._logger = self._sensor_service.get_logger(__name__) def setup(self): self._client = TwitterSearch( consumer_key=self._config["consumer_key"], consumer_secret=self._config["consumer_secret"], access_token=self._config["access_token"], access_token_secret=self._config["access_token_secret"], ) self._last_id = None if type(self._config["query"]) is not list: self._logger.exception( 'Twitter sensor failed. "query" config \ value is not a list' ) raise ValueError('[TwitterSearchSensor]: "query" is not a list') def poll(self): tso = TwitterSearchOrder() tso.set_keywords(self._config["query"]) language = self._config.get("language", None) if language: tso.set_language(language) tso.set_result_type("recent") tso.set_count(self._config.get("count", 30)) tso.set_include_entities(False) last_id = self._get_last_id() if last_id: tso.set_since_id(int(last_id)) try: tweets = self._client.search_tweets(tso) tweets = tweets["content"]["statuses"] except Exception as e: self._logger.exception("Polling Twitter failed: %s" % (str(e))) return tweets = list(reversed(tweets)) if tweets: self._set_last_id(last_id=tweets[-1]["id"]) for tweet in tweets: self._dispatch_trigger_for_tweet(tweet=tweet) def cleanup(self): pass def add_trigger(self, trigger): pass def update_trigger(self, trigger): pass def remove_trigger(self, trigger): pass def _get_last_id(self): if not self._last_id and hasattr(self._sensor_service, "get_value"): self._last_id = self._sensor_service.get_value(name="last_id") return self._last_id def _set_last_id(self, last_id): self._last_id = last_id if hasattr(self._sensor_service, "set_value"): self._sensor_service.set_value(name="last_id", value=last_id) def _dispatch_trigger_for_tweet(self, tweet): trigger = self._trigger_ref url = "%s/%s/status/%s" % (BASE_URL, tweet["user"]["screen_name"], tweet["id"]) payload = { "id": tweet["id"], "created_at": tweet["created_at"], "lang": tweet["lang"], "place": tweet["place"], "retweet_count": tweet["retweet_count"], "favorite_count": tweet["favorite_count"], "user": { "screen_name": tweet["user"]["screen_name"], "name": tweet["user"]["name"], "location": tweet["user"]["location"], "description": tweet["user"]["description"], }, "text": tweet["text"], "url": url, } self._sensor_service.dispatch(trigger=trigger, payload=payload)
import json import sys BRAND = sys.argv[1] COUCH_DATABASE_NAME = 'mt-twitter-' + BRAND TWITTER_SEARCH_KEYWORDS = [BRAND] TWITTER_CREDENTIALS = json.loads( Path(__file__).joinpath('..', '..', 'twitter.cfg.json').abspath().bytes()) # Establish connection to CouchDB and select the database to write into. # The database must already exist; create it manually in the CouchDB control # panel first. database = couchdb.Server()[COUCH_DATABASE_NAME] # Setup a twitter connection and configure its credentials: twitter_connection = TwitterSearch(**TWITTER_CREDENTIALS) # The twitter client may stop iterating the tweets at some point. # In order to automatically continue at the last position, we put the # import in a "while"-loop which will be stopped when there are no new # tweets to import. while True: # First, let's build a search query: twitter_query = TwitterSearchOrder() twitter_query.set_keywords(TWITTER_SEARCH_KEYWORDS) # Only import english tweets as our sentiment analysis will only work # with the English language for now. twitter_query.set_language('en') # We do not require entities (e.g. extracted URLs) as we are only # interested in the raw text of the tweet. twitter_query.set_include_entities(False)
from TwitterSearch import TwitterSearchOrder, TwitterSearch, TwitterSearchException from CREDS import * ts = TwitterSearch( consumer_key = TWITTER_CONSUMER_KEY, consumer_secret = TWITTER_CONSUMER_SECRET, access_token = TWITTER_ACCESS_TOKEN, access_token_secret = TWITTER_ACCESS_TOKEN_SECRET, ) try: tso = TwitterSearchOrder() tso.set_keywords(['surveymonkey','docs.google.com/forms'], or_operator=True) for tweet in ts.search_tweets_iterable(tso): print('@%s tweeted: %s' % (tweet['user']['screen_name'], tweet['text'])) except TwitterSearchException as e: print(e)
tso.set_language('en') # Include Entity information tso.set_include_entities(True) if db_count is not 0: tso.set_since_id(latest_id_int) print 'Since the document count in the ' + keyword + ' collection is above 0, the since_id uses the parameter of the latest tweet so that only new tweets are collected.' else: print 'No documents exist in the ' + keyword + ' collection right now so the since_id parameter will be empty and all tweets will be collected.' # Create a TwitterSearch object with our secret tokens ts = TwitterSearch( consumer_key = config.get('Twitter', 'consumer_key'), consumer_secret = config.get('Twitter', 'consumer_secret'), access_token = config.get('Twitter', 'access_token'), access_token_secret = config.get('Twitter', 'access_token_secret') ) # Perform the search twitter_search = ts.search_tweets_iterable(tso) # Start the insert count variable db_inserts = 0 # this is where the fun actually starts :) try: for tweet in twitter_search: if db_inserts < count: mentions_list = [] hashtags_list = []
def twitter_search(params, start_time): """ Retrieves most recent tweets since yesterday based on keywords. Retrieves as many tweets as api gives, up to the maximum set by max_tweets. :param params: The keywords to search for, formatted as list of strings. To search for a url, use this syntax: "url:\"gizmodo com\"" in which the domain is separated by spaces instead of dots and the internal quotes are escaped with backspaces. :return: Returns list of dicts containing: - tweets: the number of tweets, since yesterday, about the specified keywords (up to a maximum count of max_tweets) - tweets_followers: the number of (unique) followers of those tweets (i.e., if the same person tweets ten times in one day, that person's followers are counted once, not ten times). - most_followed_name: the name of the tweeter who tweeted in 'tweets' (above) who has the most followers - most_followed_count: the count of the number of followers who follow the tweeter with the most followers """ print('starting twitter_search') # Set up flow control variables. max_tweets = 10000 # maximum number of tweets to retrieve from api more_tweets = True # are there more tweets to retrieve? need_to_sleep = False # tells to sleep (if approaching api rate limit) error = 'ok' try: # create TwitterSearch object using this app's tokens. ts = TwitterSearch( consumer_key=tw.CONSUMER_KEY, consumer_secret=tw.CONSUMER_SECRET, access_token=tw.ACCESS_TOKEN, access_token_secret=tw.ACCESS_TOKEN_SECRET ) # Create a TwitterSearchOrder object and add keywords to it. tso = TwitterSearchOrder() for param in params: tso.add_keyword(param) # Only search for tweets since yesterday (in UTC). yesterday = datetime.datetime.utcnow().date() - datetime.timedelta(1) tso.set_since(yesterday) # Set up counter variables. tweets = 0 # count of tweets about keywords, since yesterday unique_tweeters = {} # dict of unique tweeters about keywords tweets_followers = 0 # count of followers of unique_tweeters min_id = 0 # next tweet for paginated results, when multiple api calls max_followers = (0, 'null') # the tweeter with the most followers # Keep calling the api (for paginated results) until there are no # more tweets to retrieve, or until max_tweets limit has been reached. while more_tweets and tweets < max_tweets: # Sleep for 60 seconds, if needed, to avoid hitting api limit. if need_to_sleep: print("rate limit:", rate_limit) time.sleep(60) # Call the search api. response = ts.search_tweets(tso) # Are there no more tweets to retrieve? if len(response["content"]["statuses"]) == 0: more_tweets = False else: # there are more tweets to retrieve # Iterate through the batch of tweets retrieved from this # api call. Count the tweet and track all the unique tweeters. for tweet in response["content"]["statuses"]: if tweets > max_tweets: break # stop counting/tracking if reached max_tweets tweets += 1 if (min_id == 0) or (tweet["id"] < min_id): # Set min_id to the id of this tweet. The api returns # tweets in reverse chronological order (most recent is # first), so min_id is a lowering "ceiling" of which # tweet id to start from during subsequent api call. min_id = tweet["id"] # Can uncomment the following lines to see who is tweeting. # print(str(tweets) + "\t" + str(tweet["id"]) # + "\t" + tweet["user"]["screen_name"] # + "\t" + str(tweet["user"]["followers_count"])) if tweet["user"]["screen_name"] not in unique_tweeters: tweeter = tweet["user"]["screen_name"] tweeters_followers = tweet["user"]["followers_count"] # Add tweet's screen_name and followers_count to # unique_tweeters, iff this is first time seeing # this screen_name. unique_tweeters[tweeter] = tweeters_followers # Set the next paginated result's start point (subtract one # to avoid retrieving the last tweet from this batch twice). tso.set_max_id(min_id - 1) # If less than 15 api calls remaining then sleep during next loop. # (Search api free tier allows 180 calls per 15 minute period.) rate_limit = int(ts.get_metadata()["x-rate-limit-remaining"]) if rate_limit < 15: need_to_sleep = True else: need_to_sleep = False # After all tweets have been retrieved (up to max_tweets), calculate # metrics on the followers of the tweeters in unique_tweeters. for tweeter in unique_tweeters: # Count how many followers there are in all the unique_tweeters. tweets_followers += unique_tweeters[tweeter] # Determine which tweeter from unique_tweeters has most followers. if unique_tweeters[tweeter] > max_followers[0]: max_followers = (unique_tweeters[tweeter], tweeter) except TwitterSearchException as e: tweets = None tweets_followers = None error = format_exception(ValueError, e, e.__traceback__) tweets = make_dict( value=tweets, data_name='tweets', start_time=start_time, status=error ) tweets_followers = make_dict( value=tweets_followers, data_name='tweets_followers', start_time=start_time, status=error ) most_followed_name = make_dict( value=escape(max_followers[1], True), data_name='most_followed_name', start_time=start_time, status=error ) most_followed_count = make_dict( value=max_followers[0], data_name='most_followed_count', start_time=start_time, status=error ) return [tweets, tweets_followers, most_followed_name, most_followed_count]