def tweets_by_user_demo(user="******", count=200): """ Use the REST API to search for past tweets by a given user. """ oauth = credsfromfile() client = Query(**oauth) client.register(TweetWriter()) client.user_tweets(user, count)
def tweets_by_user_demo(user='******', count=200): """ Use the REST API to search for past tweets by a given user. """ oauth = credsfromfile() client = Query(**oauth) client.register(TweetWriter()) client.user_tweets(user, count)
from nltk.twitter.common import json2csv from nltk.twitter.common import json2csv_entities from nltk.corpus import twitter_samples from nltk.twitter import Query, Streamer, Twitter, TweetViewer, TweetWriter, credsfromfile import pandas as pd oauth = credsfromfile() n = 10 # 設定拿取 tweets 資料則數 username = '******' # Query client = Query(**oauth) # 歷史資料 client.register(TweetWriter()) # 寫入 client.user_tweets(username, n) # 拿取 tweets 資料(n則) ''' 使用 json2csv 存取 tweets 資料 (text欄位) input_file 的 abspath 需參考上述 Query 寫入資料的路徑做修改 ''' input_file = twitter_samples.abspath('/Users/youngmihuang/twitter-files/tweets.20180726-155316.json') with open(input_file) as fp: json2csv(fp, 'tweets_text.csv', ['text']) # 讀取 data = pd.read_csv('tweets_text.csv') for line in data.text: print('Trump tweets content: ') print(line) # 斷詞
from nltk.twitter import Query, credsfromfile, TweetViewer from nltk.stem import WordNetLemmatizer from sklearn.feature_extraction.text import CountVectorizer import sys if (len(sys.argv)<4): print ('Usage:', sys.argv[0], ' twitter_username max_tweets_to_search max_top_words_to_print lemmatize(optional)' ) quit() #capture the output of tweetViewer to file for processing sys.stdout = open('tweets.txt', 'w') oauth = credsfromfile() client = Query(**oauth) client.register(TweetViewer(limit=sys.argv[2])) client.user_tweets(sys.argv[1], sys.argv[2]) #give back control to stdout sys.stdout = sys.__stdout__ lemmatizer = WordNetLemmatizer() if (len(sys.argv)>4 and sys.argv[4].lower()=='lemmatize'): lemmatize=True else: lemmatize=False def text_cleaner(documents): text_cleaned = [] for document in documents: