def SearchOnTwitter(keywords, language): """ Allows to test twitter search library -> Print tweets of interest. Parameters: - keywords : string array that tweets must contain - language : string indicating the language of the interest tweets Return : - array of tweets """ tweets = [] try: tso = TwitterSearchOrder() # create a TwitterSearchOrder object tso.set_keywords( keywords ) # let's define all words we would like to have a look for tso.set_language(language) # we want to see German tweets only tso.set_include_entities( False) # and don't give us all those entity information # it's about time to create a TwitterSearch object with our secret tokens ts = TwitterSearch(consumer_key=consumer_key, consumer_secret=consumer_secret, access_token=access_token, access_token_secret=access_token_secret) # this is where the fun actually starts :) for tweet in ts.search_tweets_iterable(tso): tweets.append(tweet['text']) except TwitterSearchException as e: # take care of all those ugly errors if there are some print(e) return tweets
def coleta_tweets(): try: ts = TwitterSearch(consumer_key='', consumer_secret='', access_token='', access_token_secret='') tso = TwitterSearchOrder() tso.set_keywords(['Harry potter']) tso.set_language('pt') df = [] for tweet in ts.search_tweets_iterable(tso): df.append('@%s tweeted: %s' % (tweet['user']['screen_name'], tweet['text']) + ',') #print( '@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text']) ) print('Coleta finalizada!') df = pd.DataFrame(df) #df.to_csv('tweets.txt') #print('Arquivo salvo.') return df except TwitterSearchException as e: print(e)
def coleta_tweets(): try: ts = TwitterSearch( consumer_key = '', consumer_secret = '', access_token = '', access_token_secret = '' ) tso = TwitterSearchOrder() tso.set_keywords(['Harry potter']) tso.set_language('pt') df = [] for tweet in ts.search_tweets_iterable(tso): df.append('@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text'])+',') #print( '@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text']) ) print('Coleta finalizada!') df = pd.DataFrame(df) #df.to_csv('tweets.txt') #print('Arquivo salvo.') return df except TwitterSearchException as e: print(e)
def get_tweets(query): from TwitterSearch import TwitterSearch, TwitterSearchOrder import itertools tso = TwitterSearchOrder() tso.set_keywords(query.get('query', '').split(' ')) # tso.set_language('en') tso.set_include_entities(False) ts = TwitterSearch( consumer_key=app.config.get('TWITTER_CONSUMER_KEY'), consumer_secret=app.config.get('TWITTER_CONSUMER_SECRET'), access_token=app.config.get('TWITTER_ACCESS_TOKEN'), access_token_secret=app.config.get('TWITTER_ACCESS_TOKEN_SECRET') ) return list(itertools.islice(ts.search_tweets_iterable(tso), 0, int(query.get('count', 5))))
def getTweetsByWords(authdata, word,limit=100): tso = TwitterSearchOrder() tso.set_keywords([word]) tso.set_include_entities(False) ts = TwitterSearch(consumer_key=authdata['consumer_key'], consumer_secret=authdata['consumer_secret'], access_token=authdata['access_token'], access_token_secret=authdata['access_token_secret']) result = [] c = 0 for tweet in ts.search_tweets_iterable(tso): if c == limit: break result.append(tweet['text']) print(c) c+=1 return {'status': 'Task Completed', 'result': result}
def count_tweets_of_app(app_name): """ Counts how many tweets are with the hashtag app_name and COMPETITION_NAME from diferent users Args: app_name: name of the app of whose tweets are to be counted Returns: num of votes (tweets) """ from TwitterSearch import TwitterSearchOrder, TwitterSearch, TwitterSearchException try: tso = TwitterSearchOrder() # create a TwitterSearchOrder object tso.set_keywords([ check_hashtag(app_name), COMPETITION_NAME ]) # let's define all words we would like to have a look for # it's about time to create a TwitterSearch object with our secret tokens ts = TwitterSearch(consumer_key=TWITTER_API_KEY, consumer_secret=TWITTER_API_KEY_SECRET, access_token=TWITTER_ACCESS_TOKEN, access_token_secret=TWITTER_ACCESS_TOKEN_SECRET) # this is where the fun actually starts :) users = [] #count = 0 for tweet in ts.search_tweets_iterable(tso): user = tweet['user']['id'] #Check if tweet if from the same user if user not in users: #more info https://dev.twitter.com/overview/api/tweets time_tweet = datetime.datetime.strptime( tweet['created_at'], '%a %b %d %H:%M:%S +0000 %Y') if (COMPETITION_START_DATE < time_tweet) & (time_tweet < COMPETITION_END_DATE): users.append(user) #count += 1 + tweet["retweet_count"] #print( '@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text'] ) ) return len(users) except TwitterSearchException as e: # take care of all those ugly errors if there are some print(e) return -1
class TwitterScrape: """Methods to gather data from twitter searches""" def __init__(self): # Login to twitter handle using oauth self.twitter = TwitterSearch( consumer_key='PYX15cyo7pBYyrny2kXomGf4N', consumer_secret= 'mCMtxofBFLtJv1GVRXeB9w0pw64ObRDPGmIZEGRo3uyl1oPVci', access_token='3369817647-TTV9HTaWAIbvrbpJwgXkVQtm0akEMSihl43No3P', access_token_secret='WjxjNW8YWmRSL65eIYYhQd4DsBSECO7wKpZpKsfLcR99q' ) def search(self, query, lang='en', n=10**5): """ Search twitter for specified query. Function returns n tweets or as many as can be found for that query. Parameters: query -- Search query (String) lang -- Specify language of tweets, optional, default: 'en' (String) n -- Number of tweets to return, optional, default: 10**3 (Int) Returns: tweets_out -- Pandas series of tweets of length n """ # Initialise container tweets_out = [] # Setup twitter search tso = TwitterSearchOrder() tso.set_keywords([query]) tso.set_language(lang) tso.set_include_entities(False) # Begin search sys.stdout.write("Tweet number out of {0}: ".format(n)) for i, tweet in enumerate(self.twitter.search_tweets_iterable(tso)): # Break from loop when n tweets are reached if i == n: break # Output progress if i % 100 == 0: sys.stdout.write('{0} '.format(i)) sys.stdout.flush() # Add the next tweet to the container tweets_out.append('%s' % (tweet['text'])) print # Return as pandas series as it's easier to work with return pd.Series(tweets_out)
def hello_world(keywords): response = make_response() response.headers.add("Access-Control-Allow-Origin", "*") response.headers.add("Access-Control-Allow-Headers", "*") response.headers.add("Access-Control-Allow-Methods", "*") try: tso = TwitterSearchOrder() tso.set_keywords([keywords]) ts = TwitterSearch(keys) tweets = [] for tweet in ts.search_tweets_iterable(tso): tweets.append(tweet['text']) except TwitterSearchException as e: print(e) response = jsonify({'status': 200, 'results': tweets}) return response
class TwitterSearchLastWeekIterator(TwitterSearchIterator): def __init__(self, search_query): self.search_query = search_query self.library = TwitterSearch( consumer_key=os.getenv("SEARCHTWEETS_CONSUMER_KEY"), consumer_secret=os.getenv("SEARCHTWEETS_CONSUMER_SECRET"), access_token=os.getenv("SEARCHTWEETS_ACCESS_TOKEN"), access_token_secret=os.getenv("SEARCHTWEETS_ACCESS_TOKEN_SECRET") ) twitter_search_order = self.search_query.create_twitter_search_order() self.iter = iter(self.library.search_tweets_iterable(twitter_search_order)) def __iter__(self): return self def __next__(self): return next(self.iter)
def get(self, user_handle=None): from TwitterSearch import TwitterSearch, TwitterUserOrder, TwitterSearchException if user_handle is None: return jsonify({ 'response': [], 'status': 400, 'message': 'No handle provided' }) try: import itertools user_profile = TwitterUserOrder(user_handle) # Hardcode our API keys for optimal security consumer = 'CedAugFXME85jW5MRraKTJFgO' consumer_secret = 'RjLOp02iZqQnGM5cOt4bBeFjFHtFyVW09NSH14rVEyPouFvWLs' access = '378294925-zdTFn1Gf8rcBzv6gshfjfONZG9ZSc8QFUlZd1YO8' access_secret = '0MV9lR9kFdoUkLnKoWgdZCl74vunMAoCR7INC7pQYrSfW' ts = TwitterSearch(consumer_key=consumer, consumer_secret=consumer_secret, access_token=access, access_token_secret=access_secret) # Fetch a list of tweets from the user with the provided handle tweet_iterator = ts.search_tweets_iterable(user_profile) # By default, we fetch only 20 tweets unless a query parameter is specified num_tweets = int(request.args.get('numTweets', 20)) resolved_tweets = list(itertools.islice(tweet_iterator, num_tweets)) return jsonify({'response': resolved_tweets, 'status': 200}) except TwitterSearchException as e: return jsonify({ 'response': [], 'status': 404, 'message': 'There was a problem fetching the data for {}: {}'.format( user_handle, e) })
def getTweets(politician_id, searchOnlySexistWords): try: politician = Politician.objects.get(id=politician_id) politician_names = [ politician.first_name + " " + politician.last_name, politician.username ] tso = TwitterSearchOrder() searchTerms = [] if searchOnlySexistWords: sexistWords = CONFIG['SEXISTWORDS'] for word in sexistWords: for politician_name in politician_names: searchTerms.append(word + ' ' + politician_name) elif searchOnlySexistWords is False: searchTerms = politician_names tso.set_keywords(searchTerms, or_operator=True) tso.set_language("en") tso.set_include_entities(False) querystr = tso.create_search_url() tso.set_search_url(querystr + "&tweet_mode=extended") ts = TwitterSearch(consumer_key=CONFIG['CONSUMER_KEY'], consumer_secret=CONFIG['CONSUMER_SECRET'], access_token=CONFIG['ACCESS_TOKEN'], access_token_secret=CONFIG['ACCESS_TOKEN_SECRET']) print("**Processing tweets for " + str(politician.first_name + " " + politician.last_name) + "**") if searchOnlySexistWords: tweets = ts.search_tweets_iterable(tso) return tweets else: # will limit to 100 if not only searching sexist words tweets = ts.search_tweets(tso) return tweets['content']['statuses'] except TwitterSearchException as e: logging.exception("Unable to get new tweets because of" + str(e))
def getStats(self, url, proxy, headers, timeout): """returns (retweet + favorite count) count from twitter API , url is url that could be in a tweet, proxy is 'ip:port' in string, headers should contain user-agent in as an item in dictionary, timeout is maximum time while waiting for response and is an int""" count = 0 tso = TwitterSearchOrder() tso.set_search_url('q=' + url) tso.set_result_type(result_type='mixed') tso.set_include_entities(False) tso.set_count(100) ts = TwitterSearch(consumer_key=self.ConsumerKey, consumer_secret=self.ConsumerSecret, access_token=self.AccessTokenKey, access_token_secret=self.AccessTokenSecret, proxy=proxy) for tweet in ts.search_tweets_iterable(tso): count += tweet['retweet_count'] + tweet['favorite_count'] return count
def search(self): try: tso = TwitterSearchOrder() tso.set_keywords(*self.search_terms) tso.set_include_entities(False) tso.set_count(100) ts = TwitterSearch( consumer_key='aOUVcCWLIYEbUvHW5dLjVc7Gf', consumer_secret='8qb3LTAHbj43J40Rxm0RMLAOaP4QoEHfFVGTeJ3S6iUmSBq6JJ', access_token='4251433696-ulZx8dJ3QZE95ds0PhXNldeKFhjhBUoGSuGycSE', access_token_secret='wx65NQaBHHgwC4xLOgRxFSs4kWWzkg09KkgNkAKHZryks' ) for tweet in ts.search_tweets_iterable(tso): self.data.append(tweet['text']) self.save_line(tweet['text']) # self.save_data(self.data) except TwitterSearchException as exception: print(exception)
def getTweets(politician_id): try: politician = Politician.objects.get(id=politician_id) politician_names = [politician.first_name + " " + politician.last_name, politician.last_name, politician.username] print("Getting Tweets for " + str(politician.first_name + " " + politician.last_name)) tso = TwitterSearchOrder() sexistWords = ['bitch', 'skank', 'rape'] searchTerms = [] for word in sexistWords: for politician in politician_names: searchTerms.append(word + ' ' + politician) tso.set_keywords(searchTerms, or_operator=True) print(searchTerms) tso.set_language("en") tso.set_include_entities(False) querystr = tso.create_search_url() tso.set_search_url(querystr + "&tweet_mode=extended") ts = TwitterSearch( consumer_key = os.environ.get('CONSUMER_KEY', CONFIG['CONSUMER_KEY']), consumer_secret = os.environ.get('CONSUMER_SECRET', CONFIG['CONSUMER_SECRET']), access_token = os.environ.get('ACCESS_TOKEN', CONFIG['ACCESS_TOKEN']), access_token_secret = os.environ.get('ACCESS_TOKEN_SECRET', CONFIG['ACCESS_TOKEN_SECRET']) ) return ts.search_tweets_iterable(tso) except TwitterSearchException as e: logging.exception("Unable to get new tweets because of" + str(e)) # if __name__ == "__main__": # getTweets()
tso.set_since_id(latest_id_int) print 'Since the document count in the ' + keyword + ' collection is above 0, the since_id uses the parameter of the latest tweet so that only new tweets are collected.' else: print 'No documents exist in the ' + keyword + ' collection right now so the since_id parameter will be empty and all tweets will be collected.' # Create a TwitterSearch object with our secret tokens ts = TwitterSearch( consumer_key = config.get('Twitter', 'consumer_key'), consumer_secret = config.get('Twitter', 'consumer_secret'), access_token = config.get('Twitter', 'access_token'), access_token_secret = config.get('Twitter', 'access_token_secret') ) # Perform the search twitter_search = ts.search_tweets_iterable(tso) # Start the insert count variable db_inserts = 0 # this is where the fun actually starts :) try: for tweet in twitter_search: if db_inserts < count: mentions_list = [] hashtags_list = [] # Create the caliper_tweet object caliper_tweet = { "context": "http://purl.imsglobal.org/ctx/caliper/v1/MessagingEvent", "type": "MessagingEvent", "startedAtTime": "",
def Tweets(): try: max_feeds = 10 tso = TwitterSearchOrder() # create a TwitterSearchOrder object tso.set_language('en') tso.set_include_entities( False) # and don't give us all those entity information tso.set_until(new_date) tso.arguments.update({'tweet_mode': 'extended'}) tso.arguments.update({'truncated': 'False'}) ts = TwitterSearch(consumer_key='', consumer_secret='', access_token='', access_token_secret='', proxy='http://proxy_address') for c in range(len(MainDF)): count = 0 #kw=[MainDF['twitter'][c]] #for h in MainDF['hashtag'][c]: # kw.append(h) tso.set_keywords(MainDF['hashtag'][c]) tweets_list = [] tuo = TwitterUserOrder(MainDF['twitter'][c]) # tuo.set_language('en') tuo.set_include_entities( False) # and don't give us all those entity information # tuo.set_until(days_ago) # tuo.set_count(15) tuo.arguments.update({'tweet_mode': 'extended'}) tuo.arguments.update({'truncated': 'False'}) #for tweet in ts.search_tweets_iterable(tso): # print(tweet) # tweets_list.append([tweet['user']['screen_name'],tweet['full_text']]) for tweet in ts.search_tweets_iterable(tso): if 'retweeted_status' in tweet: None #tweets_list.append([tweet['user']['screen_name'],tweet['retweeted_status']['full_text'],'Retweet of ' + tweet['retweeted_status']['user']['screen_name']]) else: links = Find(tweet['full_text']) links = ', '.join(link for link in links) #print(tweet) tweets_list.append([ MainDF['company'][c], tweet['user']['screen_name'], tweet['full_text'], tweet['created_at'], links ]) for tweet in ts.search_tweets_iterable(tuo): if tweet['lang'] != 'en': #print(tweet) None else: # print(tweet) links = Find(tweet['full_text']) links = ', '.join(link for link in links) tweets_list.append([ MainDF['company'][c], tweet['user']['screen_name'], tweet['full_text'], tweet['created_at'], links ]) count = count + 1 if count == max_feeds: break if tweets_list != []: tweets_datasets[MainDF['company'][c]] = pd.DataFrame( tweets_list) tweets_datasets[MainDF['company'][c]].columns = [ 'Company', 'Source/User', 'Title/Tweet', 'Date', 'Link' ] tweets_datasets[MainDF['company'][c]].insert( 0, 'Category', 'Twitter') for i in range( len(tweets_datasets[MainDF['company'][c]]['Date'])): tweets_datasets[MainDF['company'][c]]['Date'][i] = parse( tweets_datasets[MainDF['company'][c]]['Date'][i]) tweets_datasets[ MainDF['company'][c]]['Date'][i] = tweets_datasets[ MainDF['company'][c]]['Date'][i].date() #print(datasets[companies_names[count]]) tw_current_companies.append(MainDF['company'][c]) else: None #tweets_list.append() #print( '@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text'] ) ) except TwitterSearchException as e: # take care of all those ugly errors if there are some print(e)
document_ids = tuple(filter(lambda id_: not id_.startswith('_'), database)) if len(document_ids) > 0: # If we already have imported tweets, we should continue with the oldest # tweet we know and work our way to older tweets from there. # We do that by setting the max_id query parameter to the oldest tweet # we know. oldest_id = min(document_ids) twitter_query.set_max_id(int(oldest_id)) print('Continuing initial import from tweet {}'.format(oldest_id)) else: print('Starting initial import on fresh database.') try: # Start making requests to the twitter API by searching tweets with our # twitter query. twitter_result_stream = twitter_connection.search_tweets_iterable( twitter_query) except TwitterSearchException as exc: if exc.code == 429: # Twitter has responded with a "429 Too Many Requests" error. # That means we made more requests than twitter allows us to do. # See: https://developer.twitter.com/en/docs/basics/rate-limiting # We now wait for 100 seconds and then try again until we can make # requests again. # We use tqdm for displaying the sleep progress. for second in tqdm(range(100), 'Sleep because of rate limit'): sleep(1) # sleep for 1 second continue else: # If it is another exception, re-raise the exception so that it is # displayed and aborts the import. raise
def collect_tweets(keyword, count, force=False): from TwitterSearch import TwitterSearch from TwitterSearch import TwitterSearchOrder import pymongo from dateutil.parser import parse from alchemyapi import AlchemyAPI import ConfigParser # try: # keyword = sys.argv[1] # count = int(sys.argv[2]) # except IndexError: # e_too_few_args = "You did not enter enough arguments. Two are required: keyword, and count" # raise Exception(e_too_few_args) # try: # if sys.argv[3] == '-f': # force = True # else: # e_invalid_argument = "The only option available is -f. It is used to force the script to continue when the Alchemy API limit is exceeded." # raise Exception(e_invalid_argument) # except IndexError: # force = False # Read the config file for config variables config = ConfigParser.RawConfigParser() config.read('config.cfg') mongo_url = config.get('Mongo', 'db_url') # Connect to the Mongo database using MongoClient client = pymongo.MongoClient(mongo_url) db = client.get_default_database() # Access/create the collection based on the command line argument tweets = db[keyword] #Generate the alchemyapi variable alchemyapi = AlchemyAPI() # To accommodate for hashtags the user can substitute a . for the # in the command line. Lines 30 & 31 return it to a hashtag for the search. if keyword[0] is ".": keyword = keyword.replace('.', '#') # Lines 33-42 ensure that the query is not doing duplicate work. # First, it counts to see how many documents exist in the collection db_count = tweets.count() # If there are documents in the collection, the collection is queried, tweet objects are sorted by date, and the tweet_id of the most recent tweet is retrieved and later set as the "since_id" if db_count is not 0: latest_id = tweets.find( {}, { 'object.tweet_id':1 } ).sort("startedAtTime").limit(1) latest_id_str = latest_id[db_count-1]['object']['tweet_id'] latest_id_int = int(latest_id_str) print 'Count of documents in the ' + keyword + ' collection is not 0. It is ' + str(db_count) + '. Mongo is now identifying the latest tweet ID to append as a parameter to the API call.' # If ther are no documents in the collection, no queries are done, and the since_id is left out of the API call. else: print 'The Mongo collection ' + keyword + ' is empty. The script will now collect all tweets.' # create a TwitterSearchOrder object tso = TwitterSearchOrder() # let's define all words we would like to have a look for tso.set_keywords([keyword]) # Select language tso.set_language('en') # Include Entity information tso.set_include_entities(True) if db_count is not 0: tso.set_since_id(latest_id_int) print 'Since the document count in the ' + keyword + ' collection is above 0, the since_id uses the parameter of the latest tweet so that only new tweets are collected.' else: print 'No documents exist in the ' + keyword + ' collection right now so the since_id parameter will be empty and all tweets will be collected.' # Create a TwitterSearch object with our secret tokens ts = TwitterSearch( consumer_key = config.get('Twitter', 'consumer_key'), consumer_secret = config.get('Twitter', 'consumer_secret'), access_token = config.get('Twitter', 'access_token'), access_token_secret = config.get('Twitter', 'access_token_secret') ) # Perform the search twitter_search = ts.search_tweets_iterable(tso) # Start the insert count variable db_inserts = 0 # this is where the fun actually starts :) try: for tweet in twitter_search: if db_inserts < count: mentions_list = [] hashtags_list = [] # Create the caliper_tweet object caliper_tweet = { "context": "http://purl.imsglobal.org/ctx/caliper/v1/MessagingEvent", "type": "MessagingEvent", "startedAtTime": "", ## Can be used to query Twitter API for user information "actor": "", "verb": "tweetSent", "object": { "type": "MessagingEvent", "tweet_id": "", "tweet_uri": "", "subtype": "tweet", ## "to" should be calculated by checking in_reply_to_user_id_str is null. If it's not null, then it should be concatenated to "uri:twitter/user/" and stored in "object"['to'] "to": "", "author": { "author_uri": "", "author_alias": "", "author_name": "", }, "text": "", "sentiment": { "type": "", "score": "", "color": "" }, "parent": "", ## "mentions" is an array of the caliper IDs from the user_mentions objects array "user_mentions": [], ## "hashtags" is an array of the hashtag texts included in the tweet entities "hashtags": [] } } # Set the re-usable variables tweet_text = tweet['text'] ## AlchemyAPI Sentiment Analysis tweet_sentiment = '' response = alchemyapi.sentiment('text', tweet_text) if 'docSentiment' in response.keys(): if 'score' in response['docSentiment']: tweet_sentiment_score = response['docSentiment']['score'] tweet_sentiment_score = float(tweet_sentiment_score) tweet_sentiment_score = round(tweet_sentiment_score, 2) else: tweet_sentiment_score = 0 tweet_sentiment_type = response['docSentiment']['type'] tweet_sentiment_score_a = abs(tweet_sentiment_score) if (tweet_sentiment_score) > 0: tweet_sentiment_color = "rgba(0,255,0," + str(tweet_sentiment_score_a) + ")" else: tweet_sentiment_color = "rgba(255,0,0," + str(tweet_sentiment_score_a) + ")" elif force == True: print 'Force option set to true. The tweet_sentiment object will be set with API Limit Exceeded values.' tweet_sentiment_type = 'API Limit Exceeded' tweet_sentiment_score = 0 tweet_sentiment_color = 'rgba(0,0,0,0)' else: e_alchemy_api_limit = 'Alchemy API daily limit exceeded. Retry search with force=True to continue' raise Exception(e_alchemy_api_limit) ds = tweet['created_at'] tweet_date = parse(ds) caliper_tweet['startedAtTime'] = tweet_date caliper_tweet['actor'] = 'student:' + tweet['user']['screen_name'] caliper_tweet['object']['tweet_uri'] = 'https://twitter.com/' + tweet['user']['screen_name'] + '/status/' + tweet['id_str'] caliper_tweet['object']['tweet_id'] = tweet['id_str'] if tweet['in_reply_to_user_id_str'] is None: caliper_tweet['object']['to'] = 'NoReply' caliper_tweet['object']['parent'] = 'NoReply' else: caliper_tweet['object']['to'] = 'https://twitter.com/intent/user?user_id=' + tweet['in_reply_to_user_id_str'] if tweet['in_reply_to_status_id_str'] is None: caliper_tweet['object']['parent'] = 'None' else: caliper_tweet['object']['parent'] = 'https://twitter.com/' + tweet['user']['screen_name'] + '/status/' + tweet['in_reply_to_status_id_str'] caliper_tweet['object']['author']['author_uri'] = 'https://twitter.com/intent/user?user_id=' + tweet['user']['id_str'] caliper_tweet['object']['author']['author_alias'] = tweet['user']['screen_name'] caliper_tweet['object']['author']['author_name'] = tweet['user']['name'] caliper_tweet['object']['text'] = unicode(tweet['text']) caliper_tweet['object']['sentiment']['type'] = tweet_sentiment_type caliper_tweet['object']['sentiment']['score'] = tweet_sentiment_score caliper_tweet['object']['sentiment']['color'] = tweet_sentiment_color for x in list(tweet['entities']['hashtags']): hashtag = x['text'] hashtags_list.append(hashtag) for x in list(tweet['entities']['user_mentions']): mention = x['id_str'] mentions_list.append(mention) caliper_tweet['object']['user_mentions'] = mentions_list caliper_tweet['object']['hashtags'] = hashtags_list tweets.insert(caliper_tweet) db_inserts = db_inserts + 1 else: raise StopIteration except StopIteration: print str(db_inserts) + " inserts made in the " + keyword + " collection."
default=False, help='show extra output') args = parser.parse_args() # create a TwitterUserOrder using the command line arg as the query tuo = TwitterUserOrder(args.username) # start an index for counting the processed tweets index = 0 # start a summary of sentiment scores for later averaging sum_sentiment = 0 try: # ask Twitter for the timeline for tweet in islice(ts.search_tweets_iterable(tuo), 0, args.limit): index = index + 1 # scrub usernames, special characters and URLs from tweet cleanTweet = re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ", tweet['text']) # sentiment analysis using TextBlob analysis = TextBlob(cleanTweet) # define human friendly sentiment scores if analysis.sentiment.polarity > 0: score = positive elif analysis.sentiment.polarity == 0: score = neutral
from TwitterSearch import TwitterSearchOrder, TwitterSearch, TwitterSearchException from CREDS import * ts = TwitterSearch( consumer_key = TWITTER_CONSUMER_KEY, consumer_secret = TWITTER_CONSUMER_SECRET, access_token = TWITTER_ACCESS_TOKEN, access_token_secret = TWITTER_ACCESS_TOKEN_SECRET, ) try: tso = TwitterSearchOrder() tso.set_keywords(['surveymonkey','docs.google.com/forms'], or_operator=True) for tweet in ts.search_tweets_iterable(tso): print('@%s tweeted: %s' % (tweet['user']['screen_name'], tweet['text'])) except TwitterSearchException as e: print(e)
from TwitterSearch import TwitterSearchOrder, TwitterSearch, TwitterSearchException from CREDS import * ts = TwitterSearch( consumer_key=TWITTER_CONSUMER_KEY, consumer_secret=TWITTER_CONSUMER_SECRET, access_token=TWITTER_ACCESS_TOKEN, access_token_secret=TWITTER_ACCESS_TOKEN_SECRET, ) try: tso = TwitterSearchOrder() tso.set_keywords(['surveymonkey', 'docs.google.com/forms'], or_operator=True) for tweet in ts.search_tweets_iterable(tso): print('@%s tweeted: %s' % (tweet['user']['screen_name'], tweet['text'])) except TwitterSearchException as e: print(e)
place = raw_input("Enter a twitter handle: ") tuo = TwitterUserOrder(place) # create a TwitterUserOrder # it's about time to create TwitterSearch object again ts = TwitterSearch( consumer_key='jP53etLOQHrdCtMc4j2Djas2z', consumer_secret='9UmpzmT1IPF6JuNzODHOyXZU19Vv1C0eYOQraQLwY04jAMGpu4', access_token='746046118652416000-BZC8oHZZ75dJe8Q8fGlMigNvKy6kVwK', access_token_secret='Nfl6UpuUUdvSy60tN6p7l3l1W0GOGKpQoIbqZg78cdrtd') def my_callback_closure( current_ts_instance ): # accepts ONE argument: an instance of TwitterSearch queries, tweets_seen = current_ts_instance.get_statistics() # if queries > 0 and (queries % 60) == 0: # trigger delay every 5th query # time.sleep(30) # sleep for 60 seconds tweetArray = [] # start asking Twitter about the timeline for tweet in ts.search_tweets_iterable(tuo, callback=my_callback_closure): # tweetArray.append(tweet['text']) # if 'accessible' in tweet['text']: print tweet['text'] print( json.dumps(alchemy_language.emotion(text=tweet['text'], language='english'), indent=2)) except TwitterSearchException as e: # catch all those ugly errors print(e)
def Tweets(): try: max_feeds = 10 tso = TwitterSearchOrder() # create a TwitterSearchOrder object tso.set_language('en') tso.set_include_entities( False) # and don't give us all those entity information tso.set_until(new_date) tso.arguments.update({'tweet_mode': 'extended'}) tso.arguments.update({'truncated': 'False'}) ts = TwitterSearch( consumer_key='DMHjSht5U0UqNUsAWpZH9DXok', consumer_secret= 'olCjsx8LltiHxEiPHWafExoibDuu4eZT48udXTeSYcQbLQ3juB', access_token='1170976252213125121-ftEg9MzF9siFHUmcUkV6zzT7mQV9Db', access_token_secret='eNA62T8Ig40Iz1wmKf6baDGHqY3Wh9kxzu9oaOQdGE9h8', ) for c in range(len(MainDF)): count = 0 #kw=[MainDF['twitter'][c]] #for h in MainDF['hashtag'][c]: # kw.append(h) tso.set_keywords(MainDF['hashtag'][c]) tweets_list = [] tuo = TwitterUserOrder(MainDF['twitter'][c]) # tuo.set_language('en') tuo.set_include_entities( False) # and don't give us all those entity information # tuo.set_until(days_ago) # tuo.set_count(15) tuo.arguments.update({'tweet_mode': 'extended'}) tuo.arguments.update({'truncated': 'False'}) #for tweet in ts.search_tweets_iterable(tso): # print(tweet) # tweets_list.append([tweet['user']['screen_name'],tweet['full_text']]) for tweet in ts.search_tweets_iterable(tso): if 'retweeted_status' in tweet: None #tweets_list.append([tweet['user']['screen_name'],tweet['retweeted_status']['full_text'],'Retweet of ' + tweet['retweeted_status']['user']['screen_name']]) else: links = Find(tweet['full_text']) links = ', '.join(link for link in links) #print(tweet) tweets_list.append([ MainDF['company'][c], tweet['user']['screen_name'], tweet['full_text'], tweet['created_at'], links ]) for tweet in ts.search_tweets_iterable(tuo): if tweet['lang'] != 'en': #print(tweet) None else: # print(tweet) links = Find(tweet['full_text']) links = ', '.join(link for link in links) tweets_list.append([ MainDF['company'][c], tweet['user']['screen_name'], tweet['full_text'], tweet['created_at'], links ]) count = count + 1 if count == max_feeds: break if tweets_list != []: tweets_datasets[MainDF['company'][c]] = pd.DataFrame( tweets_list) tweets_datasets[MainDF['company'][c]].columns = [ 'Company', 'Source/User', 'Title/Tweet', 'Date', 'Link' ] tweets_datasets[MainDF['company'][c]].insert( 0, 'Category', 'Twitter') for i in range( len(tweets_datasets[MainDF['company'][c]]['Date'])): tweets_datasets[MainDF['company'][c]]['Date'][i] = parse( tweets_datasets[MainDF['company'][c]]['Date'][i]) tweets_datasets[ MainDF['company'][c]]['Date'][i] = tweets_datasets[ MainDF['company'][c]]['Date'][i].date() #print(datasets[companies_names[count]]) tw_current_companies.append(MainDF['company'][c]) else: None #tweets_list.append() #print( '@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text'] ) ) except TwitterSearchException as e: # take care of all those ugly errors if there are some print(e)