-
Notifications
You must be signed in to change notification settings - Fork 0
/
twitter_data.py
203 lines (183 loc) · 8.17 KB
/
twitter_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
class GetData(object):
def __init__(self):
import json as json
from termcolor import colored as c
self.color = c
self.json = json
self._json_data_available = False #check if saved file has data
self._json_file = 'all_tweets.json'
self.api_file = 'api_key.json'
def setup(self):
try:
with open(self.api_file, 'r') as api_file:
api_keys = self.json.load(api_file)
self.__alchemy_key = api_keys[0]['alchemy']['key']
self.__cons_key = api_keys[0]['twitter']['cons_key']
self.__cons_secret = api_keys[0]['twitter']['cons_secret']
self.__access_key = api_keys[0]['twitter']['access_key']
self.__access_secret = api_keys[0]['twitter']['access_secret']
except IOError:
print('The file containing the API keys cannot be found,'
'Please run: python api_setup.py to set that data')
def get_tweets(self, twitter_username, duration=7):
import datetime, tweepy, requests, tqdm as pbar
# import tweepy
# import tqdm as pbar
# import requests
auth = tweepy.OAuthHandler(self.__cons_key, self.__cons_secret)
auth.set_access_token(self.__access_key, self.__access_secret)
api = tweepy.API(auth)
all_tweets = []
try:
new_tweets = (api.user_timeline(screen_name=twitter_username,
count=20))
all_tweets.extend(new_tweets)
if len(all_tweets) > 0:
oldest_id = new_tweets[-1].id - 1
for i in pbar.tqdm(range(5)):
new_tweets = (api.user_timeline(screen_name=twitter_username,
count=20, max_id=oldest_id))
oldest_id = all_tweets[-1].id - 1
all_tweets.extend(new_tweets)
earliest_tweet_date = (all_tweets[0].created_at).date()
tweet_max_date = earliest_tweet_date + datetime.timedelta(days=duration)
status_list = []
for tweet in all_tweets:
if (tweet.created_at).date() <= tweet_max_date:
status_list.append(tweet._json)
with open(self._json_file, 'w') as json_data:
self.json.dump(status_list, json_data, indent=4)
else:
print(self.color('No tweets available for the entered duration', 'red'))
except tweepy.TweepError as e:
if e.reason.startswith('Failed to send request'):
print(self.color('Internet connection required, please connect and try again', 'red'))
elif e.reason == 'Not authorized.':
print('You are not authorized to view this person tweets!, (protected tweets)')
elif e.args[0][0]['code'] == 34:
print(self.color('Invalid twitter username, try again with a valid username', 'red'))
else:
print(self.color('Unknown Error', 'red'))
return ''
def check_json_file(self):
try:
try:
with open(self._json_file, 'r') as tweet_data:
data = self.json.load(tweet_data)
if len(data[0]) > 1:
self._json_data_available = True
else:
self._json_data_available = False
except ValueError:
self._json_data_available = False
except IOError:
self._json_data_available = False
def word_list(self):
import json
import re
from stop_words import get_stop_words
cachedStopWords = get_stop_words('english')
cachedStopWords.append(')')
cachedStopWords.append('-')
cachedStopWords.append(',')
tweet_list = []
with open(self._json_file, 'r') as tweet_data:
data = json.load(tweet_data)
tweet_list.extend(data)
unwanted_texts = re.compile('[@#-&()..."]')
words = []
for status in tweet_list:
for word in status['text'].split():
if unwanted_texts.match(word) or word.startswith('http')\
or word.startswith('http'):
continue
else:
for word_members in (word.replace(':',' ').replace('.',' '
).replace('!',' ').replace('?',' '
).replace('"', ' ').replace('-',' '
).replace('/',' ').split()
):
if (word_members not in cachedStopWords) and\
(word_members != 'RT'):
words.append(str(word_members))
return words
def word_frequency(self):
word_count = {}
#self.words = self.word_list()
for word in self.words:
try:
word = int(word)
try:
if word_count[word]:
word_count[word] += 1
except KeyError:
word_count[word] = 1
except ValueError:
try:
if word_count[word]:
word_count[word] += 1
except KeyError:
word_count[word] = 1
return word_count
def word_count_analysis(self):
from operator import itemgetter
from prettytable import PrettyTable
self.check_json_file()
if self._json_data_available:
word_count = self.word_frequency()
sort = word_count.items()
sorted_list =(sorted(sort, key=itemgetter(1)))
count_sum = sum(word_count.values())
count = -1
tweet_table = PrettyTable(['Word', 'Count', 'Percent'])
while count > -20:
word, value = sorted_list[count]
percent = (value/count_sum)*100
percent1 = ('{:.2f}'.format(percent))
tweet_table.add_row([word, value, percent1])
count += -1
print(self.color(tweet_table, 'cyan'))
else:
print('No tweets available yet, start by fetching for tweets first')
return ' '
def sentiment_analysis(self, all=True, sentiment=False, emotion=False):
from watson_developer_cloud import AlchemyLanguageV1
alchemyapi = AlchemyLanguageV1(api_key=self.__alchemy_key)
self.check_json_file()
if self._json_data_available:
words_list = ' '.join(self.word_list())
self.__tweets_sentiment = alchemyapi.sentiment(text=words_list)
self.__tweets_emotion = alchemyapi.emotion(text=words_list)
if all:
return (self.sentiment_table(), self.emotion_graph())
if sentiment:
return self.sentiment_table()
if emotion:
return self.emotion_graph()
else:
print(self.color('No tweets available yet, start by fetching for tweets first', 'red'))
return ' '
def emotion_graph(self):
from ascii_graph import Pyasciigraph
import ascii_graph.colors as c
from ascii_graph.colordata import vcolor
col_pattern = [c.Gre, c.Yel, c.Cya, c.Red, c.Pur]
data = [(str(emotion).capitalize(), float(value)*100) for emotion,
value in self.__tweets_emotion['docEmotions'].items()]
total_emotion_value = vcolor(data, col_pattern)
graph = Pyasciigraph()
for line in graph.graph('Emotions Graph', total_emotion_value):
print(' ', line)
return
def sentiment_table(self):
from prettytable import PrettyTable
sent_dict = self.__tweets_sentiment['docSentiment']
sentiment_data = PrettyTable(['Sentiment Type', 'Score', 'if_mixed'])
sentiment_data.add_row([sent_dict['type'], sent_dict['score'], sent_dict['mixed']])
print('\n')
print(self.color(sentiment_data, 'green'))
return
def main():
setup_getdata = GetData
setup_getdata.setup()
if __name__ == '__main__':main()