forked from ZacSweers/fb_group_analytics
/
run.py
411 lines (321 loc) · 14.5 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
from Queue import Queue # Threadsafe queue for threads to use
from collections import Counter # To count stuff for us
import datetime # Because datetime printing is hard
import time # Should be obvious
import sys # Get system info
import threading # Should be obvious
import json # Also obvious
# FB API wrapper ("pip install facepy")
import facepy
import props
from utils import Color, notify_mac, log
__author__ = 'Henri Sweers'
appeared = dict()
# Junk method used for testing
def test():
log("Test")
# Export method, recieves a jsonObj of style {"label": dictionary}
def exportData(jsonDict):
# Do stuff
print "Exported"
# print jsonDict
# Thread class. Each thread gets all the data from a certain date range
class RequestThread(threading.Thread):
def __init__(self, queue, apikey, query, curr_time, num_weeks):
# Super class
threading.Thread.__init__(self)
# Queue object given from outside. Queues are threadsafe
self.queue = queue
# Graph object for our call, authenticated with a token
self.graph = facepy.GraphAPI(apikey)
# FQL query with specified date range
self.input_query = query
# Counters. t-total, p-posts, c-comments
self.tcounter = Counter()
self.pcounter = Counter()
self.ccounter = Counter()
self.tpcounter = Counter()
self.tccounter = Counter()
self.cccounter = Counter()
# Time range, for logging
self.time_range = datetime.datetime.fromtimestamp(
curr_time - num_weeks).strftime('%Y-%m-%d') + "-" + \
datetime.datetime.fromtimestamp(curr_time).strftime(
'%Y-%m-%d')
# Main runner
def run(self):
log("\t(" + self.time_range + ') - Getting posts...')
# Get group posts
try:
group_posts = self.graph.fql(query=self.input_query)
except Exception as e:
# 99% of the time this is just an expired API access token
log("Error: " + str(e), Color.RED)
sys.exit()
log("\t(" + self.time_range + ") - " +
str(len(group_posts["data"])) + " posts")
# Iterate over posts
if len(group_posts) != 0:
for post in group_posts["data"]:
comments_query = \
"SELECT fromid, likes, id, time FROM comment WHERE post_id="
# If it's a new actor
if post['actor_id'] in appeared.keys():
if appeared[post['actor_id']] > int(post['created_time']):
appeared[post['actor_id']] = int(post['created_time'])
else:
appeared[post['actor_id']] = int(post['created_time'])
# Add post's like count to that user in our total_likes_counter
self.tcounter[post['actor_id']] += post[
'like_info']['like_count']
# Add to top like posts counter
self.pcounter[post['post_id']] = post['like_info'][
'like_count']
# Timestamp of post by
day_timestamp = datetime.datetime.fromtimestamp(
int(post['created_time']))
day_timestamp = day_timestamp.replace(hour=0, minute=0,
second=0, microsecond=0)
day_timestamp = (
day_timestamp - datetime.datetime(1970, 1, 1)).total_seconds()
# Add to post count
self.tpcounter[str(day_timestamp)] += 1
# Initialize controversial counter
self.cccounter[post['post_id']] += 1
# Get likes on comments
comments = self.graph.fql(
comments_query + "\"" + str(post['post_id']) +
"\" LIMIT 500")
# Iterate over comments
if len(comments["data"]) != 0:
log("\t\t(" + self.time_range + ") - " + str(
len(comments["data"])) + " comments")
log("\t\t(" + self.time_range + ') - Getting comments...')
for c in comments["data"]:
# add their like counts to their respective users
# in our total_likes_counter
self.tcounter[c['fromid']] += c['likes']
# add like count to top_comments_likes_counter
self.ccounter[c['id']] = c['likes']
# Add to comment count
self.tccounter[str(day_timestamp)] += 1
# Add to controversial counter
self.cccounter[post['post_id']] += 1
# If it's a new actor
if c['fromid'] in appeared.keys():
if appeared[c['fromid']] > int(c['time']):
appeared[c['fromid']] = int(c['time'])
else:
appeared[c['fromid']] = int(c['time'])
else:
log("\t\tNo comments from this post")
else:
log("\t\tNo posts from this time frame")
self.queue.put({'t': self.tcounter, 'p': self.pcounter, 'c':
self.ccounter, 'tp': self.tpcounter,
'tc': self.tccounter, 'cc': self.cccounter})
# Method for counting various total likes in a group
def count_group_likes():
fb_api_access_token = props.token
fb_app_id = props.app_id
fb_secret_key = props.secret_key
# Counter object to do the counting for us
total_likes_counter = Counter()
top_liked_posts_counter = Counter()
top_liked_comments_counter = Counter()
total_posts_counter = Counter()
total_comments_counter = Counter()
most_discussed_counter = Counter()
group_id = props.group_id
num_of_items_to_return = props.num_of_items
# Put the number of weeks you want it to increment by each time
# smaller is better, but too small and you could hit your rate limit
# ... which is 600 calls per 600 seconds. Maybe apps get more
num_weeks = int("2")
# Convert to unix time
num_weeks_unix = num_weeks * 604800
start_date = props.start_date
datetime_start_date = datetime.datetime.fromtimestamp(start_date)
# Query strings for FQL
posts_query = \
"SELECT post_id, like_info, actor_id, created_time FROM stream" + \
" WHERE source_id=" + group_id + " AND created_time<"
person_query = "SELECT first_name, last_name FROM user WHERE uid="
# Authorize our API wrapper
graph = facepy.GraphAPI(fb_api_access_token)
# Code to programatically extend key
if extend_key:
access_token, expires_at = facepy.get_extended_access_token(
fb_api_access_token,
fb_app_id,
fb_secret_key
)
# This will print out new extended token and new expiration date
# Copy them and replace your token above with this one
print 'New token: ' + access_token
print 'New expiration date: ' + expires_at
log('Getting group posts', Color.BLUE)
# Send end time to current time and work backward
end_time = int(time.time())
# Or manually set end time
# end_time = 1392422400
log('Current date is: ' + datetime.datetime.fromtimestamp(
end_time).strftime('%Y-%m-%d'))
log('Incrementing by ' + str(num_weeks) + ' weeks at a time')
# List of thread objects
threads = []
# Threadsafe queue for the threads to dump their data in
final_queue = Queue()
log("Initializing threads...", Color.BLUE)
# While loop that creates the threads
# Instantiates each thread with calculated time, keeps decrementing to
# start
while end_time > start_date:
# New query
new_query = posts_query + str(
end_time) + " AND created_time>" + \
str(end_time - num_weeks_unix) + " LIMIT 600"
# Thread creation
t = RequestThread(final_queue, fb_api_access_token, new_query,
end_time, num_weeks_unix)
# Add it to our list
threads.append(t)
# Decrement the time
end_time -= num_weeks_unix
# Start the thread
t.start()
log("Joining threads...", Color.BLUE)
# Wait for all the threads to finish before counting everything up
for t in threads:
t.join()
log("Done, merging data...", Color.BLUE)
# Count up all the data by merging all the counters from each thread result
for stuff in list(final_queue.queue):
total_likes_counter += stuff['t']
top_liked_posts_counter += stuff['p']
top_liked_comments_counter += stuff['c']
total_posts_counter += stuff['tp']
total_comments_counter += stuff['tc']
most_discussed_counter += stuff['cc']
most_active_day_counter = total_posts_counter + total_comments_counter
# Returns key-value list of top <num_of_items_to_return> items
most_common_people = total_likes_counter.most_common(
num_of_items_to_return)
top_posts = top_liked_posts_counter.most_common(num_of_items_to_return)
top_comments = top_liked_comments_counter.most_common(
num_of_items_to_return)
total_posts = total_posts_counter.most_common(num_of_items_to_return)
total_comments = total_comments_counter.most_common(num_of_items_to_return)
most_active_days = most_active_day_counter.most_common(
num_of_items_to_return)
most_discussed = most_discussed_counter.most_common(num_of_items_to_return)
top_people_stats = []
# Iterate over top people and retrieve names from their ID's
# Use enumerate to keep track of indices for rank numbers
log('\nPeople Stats', Color.BOLD)
log("* = Weighted average calc'd from user's first post date")
for i, x in enumerate(most_common_people):
person = graph.fql(person_query + str(x[0]))["data"][0]
now = datetime.datetime.now()
join_date = datetime.datetime.fromtimestamp(appeared[x[0]])
diff1 = now - datetime_start_date
diff2 = now - join_date
avg = x[1] / (diff1.total_seconds() / 60 / 60 / 24 / 7)
weighted_avg = x[1] / (diff2.total_seconds() / 60 / 60 / 24 / 7)
top_people_stats.append({
"name": person['first_name'] + " " + person['last_name'],
"likes": x[1],
"avg": avg,
"augmented_avg": weighted_avg,
"first": int(
(join_date - datetime.datetime(1970, 1, 1)).total_seconds())
})
print '#' + str(i + 1) + '. ' + person['first_name'] + " " + person[
'last_name']
print '-- Likes: ' + str(x[1])
print '-- Weekly average: ' + str(avg)
print '-- Weekly average*: ' + str(weighted_avg)
print '-- First post: ' + join_date.strftime('%Y-%m-%d')
# Iterate over top posts and get info
log('\nTop posts!', Color.BOLD)
for x in top_posts:
post = graph.get(str(x[0]))
s = str(x[1]) + " - " + post['from']['name'] + " - " + post['type']
print s
if 'message' in post:
m = str(post['message'].encode('ascii', 'ignore')).replace('\n',
' ')
if len(m) > 70:
print '-- ' + m[0:70] + "..."
else:
print '-- ' + m
print '-- http://www.facebook.com/' + post['id']
# Iterate over top comments and get info
log('\nTop comments!', Color.BOLD)
for x in top_comments:
comment = graph.get(str(x[0]))
s = str(x[1]) + " - " + comment['from']['name']
print s
if 'message' in comment:
c = str(comment['message'].encode('ascii', 'ignore')).replace('\n',
' ')
if len(c) > 70:
print '-- ' + c[0:70] + "..."
else:
print '-- ' + c
print '-- http://www.facebook.com/' + comment['id']
# Iterate over total posts/comments and calculate info
log('\nMost active days (by number of posts and comments)', Color.BOLD)
for x in most_active_days:
d = datetime.datetime.fromtimestamp(float(x[0])).strftime('%m/%d/%Y')
print str(x[1]) + " - " + d
# Iterate over total posts and calculate info
log('\nMost active days (by number of posts)', Color.BOLD)
for x in total_posts:
d = datetime.datetime.fromtimestamp(float(x[0])).strftime('%m/%d/%Y')
print str(x[1]) + " - " + d
# Iterate over total comments and calculate info
log('\nMost active days (by number of comments)', Color.BOLD)
for x in total_comments:
d = datetime.datetime.fromtimestamp(float(x[0])).strftime('%m/%d/%Y')
print str(x[1]) + " - " + d
# Iterate over top posts and get info
log('\nMost discussed', Color.BOLD)
for x in most_discussed:
post = graph.get(str(x[0]))
s = str(x[1]) + " - " + post['from']['name'] + " - " + post['type']
print s
if 'message' in post:
m = str(post['message'].encode('ascii', 'ignore')).replace('\n',
' ')
if len(m) > 70:
print '-- ' + m[0:70] + "..."
else:
print '-- ' + m
print '-- http://www.facebook.com/' + post['id']
log('\nExporting...', Color.BLUE)
dataDict = json.dumps({"top_people_stats": top_people_stats,
"top_liked_posts_counter": top_liked_posts_counter,
"top_liked_comments_counter": top_liked_comments_counter,
"total_posts_counter": total_posts_counter,
"total_comments_counter": total_comments_counter,
"most_active_day_counter": most_active_day_counter,
"most_common_people": most_common_people,
"top_posts": top_posts,
"top_comments": top_comments,
"total_posts": total_posts,
"total_comments": total_comments,
"most_active_days": most_active_days})
exportData(dataDict)
args = sys.argv
extend_key = False # boolean for if we want to extend token access
if len(args) > 1:
if "--extend" in args: # Pass in flag
extend_key = True
if "test" in args:
test()
sys.exit()
else:
log('No args specified')
count_group_likes()
notify_mac()