def find_matching_tweets(num_tweets=100, fname="matching_tweets.csv", shownum=50): """Given the number of tweets to retrieve, queries that number of tweets with the keyword "Trump" and saves the tweet id and text as a csv file "fname". Prints out the shownum amount of tweets using panda. Does not remove retweets.""" oauth = credsfromfile() # create and register a streamer client = Streamer(**oauth) writer = TweetWriter(limit=num_tweets) client.register(writer) # get the name of the newly-created json file input_file = writer.timestamped_file() client.filter(track="trump") # case-insensitive client.sample() with open(input_file) as fp: # these two fields for now json2csv(fp, fname, [ 'id', 'text', ]) # pretty print using pandas tweets = pd.read_csv(fname, encoding="utf8") return tweets.head(shownum)
def senti_score_realtime(keyword, client, classifier, twt_num, step, step_num, verb=0): score_all = np.zeros((twt_num, step_num)) for i in range(step_num): origin = datetime.datetime.today() end_t = origin + datetime.timedelta(minutes=step) # don't store files in this case client.register(TweetWriter(limit=twt_num)) tweets_gen = client.filter(track=keyword) tweets = [] for t in tweets_gen: tweets.append(t['text']) score = classifier.test(tweets) score_all[0:len(score[:, 0]), i] = score[:, 0] if verb == 1: print keyword + ' : ' + str(origin) + ' to ' + str( end_t) + ' score=' + str(np.mean(score_all[:, i], axis=0)) time.sleep((end_t - origin).seconds) return score_all
def streamtofile_demo(limit=20): """ Write 20 tweets sampled from the public Streaming API to a file. """ oauth = credsfromfile() client = Streamer(**oauth) client.register(TweetWriter(limit=limit, repeat=False)) client.statuses.sample()
def tweets_by_user_demo(user="******", count=200): """ Use the REST API to search for past tweets by a given user. """ oauth = credsfromfile() client = Query(**oauth) client.register(TweetWriter()) client.user_tweets(user, count)
def limit_by_time_demo(limit=20): """ Sample from the Streaming API and send output to terminal. """ oauth = credsfromfile() client = Streamer(**oauth) client.register(TweetWriter(limit=limit, date_limit=DATE)) client.sample()
from nltk.twitter.common import json2csv from nltk.twitter.common import json2csv_entities from nltk.corpus import twitter_samples from nltk.twitter import Query, Streamer, Twitter, TweetViewer, TweetWriter, credsfromfile import pandas as pd oauth = credsfromfile() n = 10 # 設定拿取 tweets 資料則數 username = '******' # Query client = Query(**oauth) # 歷史資料 client.register(TweetWriter()) # 寫入 client.user_tweets(username, n) # 拿取 tweets 資料(n則) ''' 使用 json2csv 存取 tweets 資料 (text欄位) input_file 的 abspath 需參考上述 Query 寫入資料的路徑做修改 ''' input_file = twitter_samples.abspath('/Users/youngmihuang/twitter-files/tweets.20180726-155316.json') with open(input_file) as fp: json2csv(fp, 'tweets_text.csv', ['text']) # 讀取 data = pd.read_csv('tweets_text.csv') for line in data.text: print('Trump tweets content: ') print(line) # 斷詞
def save_tweetes_file(self): client = Streamer(**oauth) client.register(TweetWriter(limit=100, subdir='twitter_samples_files')) client.statuses.sample()
def __init__(self, lim=100): TweetWriter.__init__(self, limit=lim, subdir="Tweets")