def sampletoscreen_demo(limit=20): """ Sample from the Streaming API and send output to terminal. """ oauth = credsfromfile() client = Streamer(**oauth) client.register(TweetViewer(limit=limit)) client.sample()
def tracktoscreen_demo(track="taylor swift", limit=10): """ Track keywords from the public Streaming API and send output to terminal. """ oauth = credsfromfile() client = Streamer(**oauth) client.register(TweetViewer(limit=limit)) client.filter(track=track)
def followtoscreen_demo(limit=10): """ Using the Streaming API, select just the tweets from a specified list of userIDs. This is will only give results in a reasonable time if the users in question produce a high volume of tweets, and may even so show some delay. """ oauth = credsfromfile() client = Streamer(**oauth) client.register(TweetViewer(limit=limit)) client.statuses.filter(follow=USERIDS)
def senti_score_daily(keyword, client, classifier, twt_num, start_time, days, verb=0): score_all = np.zeros((twt_num, days)) year = start_time[0] month = start_time[1] day = start_time[2] origin = datetime.date(year, month, day) for i in range(days): start_t = origin + datetime.timedelta(days=i) end_t = origin + datetime.timedelta(days=i + 1) date1 = start_t.timetuple()[:6] date2 = end_t.timetuple()[:6] # if not exits get tweets from server and save them, otherwise just load them filename = keyword + '_' + str(start_t) + '_' + str(end_t) + '.pickle' path = os.path.join(os.path.dirname(__file__), os.pardir, 'data', 'tweets') if os.path.isfile(os.path.join(path, filename)): f = open(os.path.join(path, filename), 'rb') tweets = pickle.load(f) f.close() else: client.register( TweetViewer(limit=twt_num, lower_date_limit=date1, upper_date_limit=date2)) tweets_gen = client.search_tweets(keywords=keyword, limit=twt_num, lang='en') tweets = [] for t in tweets_gen: tweets.append(t['text']) f = open(os.path.join(path, filename), 'wb') pickle.dump(tweets, f) f.close() score = classifier.test(tweets) if len(score[:, 0]) <= twt_num: score_all[0:len(score[:, 0]), i] = score[:, 0] else: score_all[0:twt_num, i] = score[:twt_num, 0] if verb == 1: print keyword + ' : ' + str(start_t) + ' to ' + str( end_t) + ' score=' + str(np.mean(score_all[:, i], axis=0)) return score_all
def limit_by_time_demo(keywords="nltk"): """ Query the REST API for Tweets about NLTK since yesterday and send the output to terminal. This example makes the assumption that there are sufficient Tweets since yesterday for the date to be an effective cut-off. """ date = yesterday() dt_date = datetime.datetime(*date) oauth = credsfromfile() client = Query(**oauth) client.register(TweetViewer(limit=100, lower_date_limit=date)) print(f"Cutoff date: {dt_date}\n") for tweet in client.search_tweets(keywords=keywords): print("{} ".format(tweet["created_at"]), end="") client.handler.handle(tweet)
def senti_score_time(keyword, client, classifier, twt_num, start_time, step, step_num, verb=0): score_all = np.zeros((twt_num, step_num)) year = start_time[0] month = start_time[1] day = start_time[2] hour = start_time[3] minute = start_time[4] origin = datetime.datetime(year, month, day, hour, minute) for i in range(step_num): start_t = origin + datetime.timedelta(minutes=step * i) end_t = origin + datetime.timedelta(minutes=step * (i + 1)) date1 = start_t.timetuple()[:6] date2 = end_t.timetuple()[:6] # don't store files in this case client.register( TweetViewer(limit=twt_num, lower_date_limit=date1, upper_date_limit=date2)) tweets_gen = client.search_tweets(keywords=keyword, limit=twt_num, lang='en') tweets = [] for t in tweets_gen: tweets.append(t['text']) score = classifier.test(tweets) score_all[0:len(score[:, 0]), i] = score[:, 0] if verb == 1: print keyword + ' : ' + str(start_t) + ' to ' + str( end_t) + ' score=' + str(np.mean(score_all[:, i], axis=0)) return score_all
from nltk.twitter import Query, credsfromfile, TweetViewer import process_twt from NBClassifier import NBClassifier from SCClassifier import SCClassifier from BGClassifier import BGClassifier from nltk.corpus import twitter_samples, TwitterCorpusReader import os import pickle import matplotlib.pyplot as plt import numpy as np # settings oauth = credsfromfile() client = Query(**oauth) twtNum = 10 client.register(TweetViewer(limit=twtNum)) tweets_gen = client.search_tweets(keywords='hearthstone', lang='en') tweets = [] slangdict = process_twt.get_slang_dict() twt_list = [] for t in tweets_gen: twt_list.append(process_twt.preprocess(t['text'], slangdict=slangdict)) twt_list = list(set(twt_list)) for t in twt_list[:twtNum]: print t fileIds = twitter_samples.fileids() root = twitter_samples.root # read tweet data from corpus
class TwitterAPI(): oauth = credsfromfile() client = Streamer(**oauth) client.register(TweetViewer(limit=10)) client.sample() path = BASE_DIR def __init__(self, to_screen: True, follow: None, keywords, limit: 10): self.follow = follow self.keywords = keywords tw = Twitter.tweets(to_screen=to_screen, follow=follow, keywords=keywords, limit) def get_twiter(self, keywords): client = Query(**oauth) tweets = client.search_tweets(keywords, limit) tweet = next(tweets) return tweet def get_users(self, *args: None): #by IdUser client = Query(**oauth) user_info = clinet.user_info_from_id(*args) users = [] for user in user_info: name, followers, following = user_info['name'], user_info[ 'followers_count'], user_info['friends_count'] users.append(user) print(f'{name} {followers} {following}\n') return users def save_tweetes_file(self): client = Streamer(**oauth) client.register(TweetWriter(limit=100, subdir='twitter_samples_files')) client.statuses.sample() def get_tweet_JsonFiles(self, json_file2: None): if (json_file2 == None): all_tweets_samples = twitter_samples.fileids() json_file = all_tweet_samples[2] #json file tweet_string = twitter_samples.strings(json_file) return tweet_string tweet_string = json_file2 return tweet_string def tokenize_tweets(self, string_tweet): toked_tweet = twitter_sample.tokenized(string_tweet) return toked_tweet def convert_csv_tweet_file(self, input_file, args=[ 'created_at', 'favorite_count', 'id', 'in_reply_to_status_id', 'in_reply_to_user_id', 'retweet_count', 'text', 'truncated', 'user.id' ]): with open(input_file) as file: json2csv(file, path + 'tweets_text.csv', args) return open(path + 'tweets_text.csv', 'r').readlines() def read_csv_tweets(self, filepath, *args): tw = pd.read_csv(filepath, header='tweets', index_col=1, encoding='utf-8', dtype=32).head() return tw def get_tweet_by_id(self, filepath, tw_id): ids = str(tw_id) ids = StringIO(ids) client = Query(**oauth) hydrated = client.expand_tweetids(ids_f) tw = read_csv_tweets(filepath) for i in hydrated: yield tw.loc[tw['user.id'] == i]['text']
from nltk.twitter import Query, credsfromfile, TweetViewer from nltk.stem import WordNetLemmatizer from sklearn.feature_extraction.text import CountVectorizer import sys if (len(sys.argv)<4): print ('Usage:', sys.argv[0], ' twitter_username max_tweets_to_search max_top_words_to_print lemmatize(optional)' ) quit() #capture the output of tweetViewer to file for processing sys.stdout = open('tweets.txt', 'w') oauth = credsfromfile() client = Query(**oauth) client.register(TweetViewer(limit=sys.argv[2])) client.user_tweets(sys.argv[1], sys.argv[2]) #give back control to stdout sys.stdout = sys.__stdout__ lemmatizer = WordNetLemmatizer() if (len(sys.argv)>4 and sys.argv[4].lower()=='lemmatize'): lemmatize=True else: lemmatize=False def text_cleaner(documents): text_cleaned = [] for document in documents: