-
Notifications
You must be signed in to change notification settings - Fork 0
/
twitter-donaldtrump.py
52 lines (42 loc) · 1.67 KB
/
twitter-donaldtrump.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
from nltk.twitter.common import json2csv
from nltk.twitter.common import json2csv_entities
from nltk.corpus import twitter_samples
from nltk.twitter import Query, Streamer, Twitter, TweetViewer, TweetWriter, credsfromfile
import pandas as pd
oauth = credsfromfile()
n = 10 # 設定拿取 tweets 資料則數
username = 'realDonaldTrump'
# Query
client = Query(**oauth) # 歷史資料
client.register(TweetWriter()) # 寫入
client.user_tweets(username, n) # 拿取 tweets 資料(n則)
'''
使用 json2csv 存取 tweets 資料 (text欄位)
input_file 的 abspath 需參考上述 Query 寫入資料的路徑做修改
'''
input_file = twitter_samples.abspath('/Users/youngmihuang/twitter-files/tweets.20180726-155316.json')
with open(input_file) as fp:
json2csv(fp, 'tweets_text.csv', ['text'])
# 讀取
data = pd.read_csv('tweets_text.csv')
for line in data.text:
print('Trump tweets content: ')
print(line)
# 斷詞
tokenized = twitter_samples.tokenized(input_file)
for tok in tokenized[:5]:
print('tokenized: ')
print(tok)
# tweets 資料處理
with open(input_file) as fp:
json2csv_entities(fp, 'tweets.20180726-155316.hashtags.csv',
['id', 'text'], 'hashtags', ['text'])
with open(input_file) as fp:
json2csv_entities(fp, 'tweets.20180726-155316.user_mentions.csv',
['id', 'text'], 'user_mentions', ['id', 'screen_name'])
with open(input_file) as fp:
json2csv_entities(fp, 'tweets.20180726-155316.media.csv',
['id'], 'media', ['media_url', 'url'])
with open(input_file) as fp:
json2csv_entities(fp, 'tweets.20180726-155316.urls.csv',
['id'], 'urls', ['url', 'expanded_url'])