def getProcessed(self): all_posts = self.g.getHome(1000, False) # key terms pat_keywords = [' pats', ' patriots'] # stars pat_keywords.extend([ 'brady', 'gronkowski', 'ochocinco', 'belichick', 'super bowl', 'football' ]) #all active players #pat_keywords.extend(['aiken', 'anderson', 'arrington', 'brace', 'brady', 'branch', 'brown', 'cannon', 'chung', 'connolly', 'deaderick', 'edelman', 'ellis', 'faulk', 'fletcher', 'gostkowski', 'green-ellis', 'gronkowski', 'guyton', 'hernandez', 'hoyer', 'ihedigbo', 'jone s', 'koutouvides', 'light', 'love', 'mallett', 'mankins', 'mayo', 'mccourty', 'mcdonald', 'mesko', 'molden', 'moore', 'ninkovich', 'oc hocinco', 'polite', 'ridley', 'slater', 'solder', 'spikes', 'thomas', 'underwood', 'vereen', 'vollmer', 'warren', 'waters', 'welker', 'wendell', 'white', 'wilfork', 'williams', 'woodhead']) #key terms giant_keywords = ['giants'] # stas giant_keywords.extend(['manning', 'coughlin', 'superbowl']) #giant_keywords.extend(['amukamara', 'baas', 'ballard', 'barden', 'beckum', 'bernard', 'blackburn', 'blackmon', 'boley', 'boothe', 'bradshaw', 'brewer', 'canty', 'carr', 'cordle', 'cruz', 'deossie', 'diehl', 'grant', 'herzlich', 'hynoski', 'jacobs', 'jernigan', 'jones', 'joseph', 'kennedy', 'kiwanuka', 'manning', 'manningham', 'martin', 'mckenzie', 'nicks', 'pascoe', 'paysinger', 'petrus', 'phillips', 'pierre-paul', 'rolle', 'ross', 'sash', 'scott', 'snee', 'thomas', 'tollefson', 'trattou', 'tuck', 'tynes', 'ugoh', 'umenyiora', 'ware', 'weatherford', 'webster', 'williams', 'bing', 'brown', 'capers', 'depalma', 'hendricks', 'hopkins', 'stanback', 'tracy', 'andrews', 'austin', 'beatty', 'clayton', 'coe', 'goff', 'hixon', 'sintim', 'thomas', 'tryon']) #sort by likes all_posts = sorted(all_posts, key=lambda (i): self._fieldCount(i)) #segregate by types pr = ap(all_posts) photos = pr.getByKeyValue('type', 'photo') posts = pr.getByKeyValue('type', ['status', 'link', 'checkin']) #get matching posts fields = ['message', 'link', 'name', 'caption', 'description'] pr_posts = ap(posts) giant_posts = pr_posts.searchPosts(fields, giant_keywords, True) pat_posts = pr_posts.searchPosts(fields, pat_keywords, True) pr_photos = ap(photos) giant_photos = pr_photos.searchPosts(fields, giant_keywords) pat_photos = pr_photos.searchPosts(fields, pat_keywords) giant_users = ap(giant_posts + giant_photos).groupByUid(False) pat_users = ap(pat_posts + pat_photos).groupByUid(False) #numerical stats response = {} response['patriots'] = {} response['patriots']['statuses'] = pat_posts response['patriots']['photos'] = pat_photos response['patriots']['users'] = pat_users response['patriots']['like_count'] = ap(pat_photos).countLikes() + ap( pat_posts).countLikes() response['patriots']['comment_count'] = ap( pat_photos).countComments() + ap(pat_posts).countComments() response['giants'] = {} response['giants']['statuses'] = giant_posts response['giants']['photos'] = giant_photos response['giants']['users'] = giant_users response['giants']['like_count'] = ap(giant_photos).countLikes() + ap( giant_posts).countLikes() response['giants']['comment_count'] = ap( giant_photos).countComments() + ap(giant_posts).countComments() #count active friends active_friends = [] for u in pat_users: active_friends.append(u['id']) for u in giant_users: active_friends.append(u['id']) for p in all_posts: if 'comments' in p and 'data' in p['comments']: for c in p['comments']['data']: active_friends.append(c['from']['id']) active_friend_count = len(Counter(active_friends)) response['active_friends'] = {'count': active_friend_count} #dump data into mongo self.db.users.insert({ 'username': self.g.getUsername(), 'data': self.g.getUser() }) self.db.tokens.insert({ 'username': self.g.getUsername(), 'token': self.g.getToken() }) self.db.feed.insert({ 'username': self.g.getUsername(), 'posts': all_posts }) return json.dumps(response)
def doFeed(self): v = {} #feed_all = self.__getFileObj('data/feed.json') feed_all = self.g.getFeed() #print 'PROCESSING FEED' # #most liked # feed_all = sorted(feed_all, key=lambda (i): self._fieldCount(i)) #print '\nMOST LIKED:' v['most_liked'] = ap(feed_all).getTopPosts(5) # # Comments # feed_all = sorted(feed_all, key=lambda (i): self._fieldCount(i, 'comments')) #print '\nMOST COMMENTS:' v['most_comments'] = ap(feed_all).getTopPosts(3) # # RATE # begin = getTime(feed_all[-1]['updated_time']) end = getTime(feed_all[0]['updated_time']) duration = end - begin dur_seconds = duration.total_seconds() v['time'] = { 'begin': str(begin), 'end': str(end), 'duration': dur_seconds } # # BY TIME OF DAY # by_hour = {} #preallocate for i in range(24): by_hour[i] = [] for e in feed_all: time = getTime(e['updated_time']) by_hour[time.hour].append(e) #print '\nUSAGE BY HOUR' v['time']['posts'] = [] for i in range(24): #print '%i: %s' % (i, '#' * len(by_hour[i])) v['time']['posts'].append(len(by_hour[i])) # # TYPE METRICS COUNTS # #print '\nTYPE COUNTS' v['type'] = {} for s in ['photo', 'link', 'status', 'checkin']: #print s.upper() feed_filtered = ap(feed_all).getByKeyValue('type', s) feed_filtered = sorted(feed_filtered, key=lambda (i): self._fieldCount(i)) #print 'total of type %s: %i' % (s, len(feed_filtered)) #print '--- %s per hour' % str(len(feed_filtered)/dur_seconds*3600*24) #print 'top:' top = ap(feed_filtered).getTopPosts(3) v['type'][s] = {'count': len(feed_filtered), 'top': top} # # BY FRIEND # feed_uid = ap(feed_all).groupByUid() v['friend'] = {'active': [], 'liked': [], 'ratio': []} #print '\nMOST ACTIVE FRIENDS' for i in range(1, 6): obj = {} obj['name'] = feed_uid[-i]['posts'][0]['from']['name'] #print obj['name'] for s in ['photo', 'link', 'status', 'checkin']: obj[s + '_count'] = len( ap(feed_uid[-i]['posts']).getByKeyValue('type', s)) #print ' %s: %i,' % (s, obj[s+'_count']), obj['total_count'] = feed_uid[-i]['count'] #print 'total: %i' % obj['total_count'] v['friend']['active'].append(obj) #print '\nMOST OVERALL LIKES BY FRIEND' feed_uid = sorted(feed_uid, key=lambda (i): i['likes']) for i in range(1, 6): v['friend']['liked'].append({ 'name': feed_uid[-i]['posts'][0]['from']['name'], 'like_count': feed_uid[-i]['likes'] }) #print '%s received %i likes' % (feed_uid[-i]['posts'][0]['from']['name'], feed_uid[-i]['likes']) #print '\nHIGHEST LIKE RATIO OF A FRIEND' def ratio(i): if not i['likes'] or not i['count']: return 0 return i['likes'] / float(i['count']) feed_uid = sorted(feed_uid, key=lambda (i): ratio(i)) for i in range(1, 6): fid = feed_uid[-i] obj = {} obj['name'] = fid['posts'][0]['from']['name'] obj['like_ratio'] = float(fid['likes']) / fid['count'] obj['like_count'] = fid['likes'] obj['post_count'] = fid['count'] #print '%s had a %f with %i likes over %i posts ' % (obj['name'], obj['like_ratio'], obj['like_count'], obj['post_count']) v['friend']['ratio'].append(obj) status_length = [] sum = 0 for e in feed_all: if e['type'] == 'status' and 'message' in e: status_length.append(len(e['message'])) sum += len(e['message']) #print 'average status length: %f characters' % (float(sum)/len(status_length)) v['average_status_length'] = float(sum) / len(status_length) return v
def doHome(self): v = {} #home_all = self.__getFileObj('data/home.json') home_all = self.g.getHome() # # TEBOWS # ebows = [] for e in home_all: if 'message' in e and 'ebow' in e['message']: ebows.append(e) #print '\nTEBOWS:' #for e in ebows: #print e['from']['name'] + ', ', #print str(len(ebows)) + ' out of ' + str(len(home_all)) + ' were about tebow' v['tebows'] = {'tebow_count': len(ebows), 'all_count': len(home_all)} # # PRESIDENTS # pres = { 'gingrich': [], 'romney': [], 'santorum': [], 'paul': [], 'obama': [], 'colbert': [] } def check_all(tuple, s): for t in tuple: if t in s: return True return False for e in home_all: if 'message' not in e: continue msg = e['message'].lower() obj = {'message': e['message'], 'name': e['from']['name']} if check_all(('newt', 'gingrich'), msg): pres['gingrich'].append(obj) if check_all(('santorum', ), msg): pres['santorum'].append(obj) #print e['message'] if check_all(('ron', 'paul'), msg): pres['paul'].append(obj) if check_all(('colbert', ), msg): pres['colbert'].append(obj) if check_all(('obama', 'barrack'), msg): pres['colbert'].append(obj) #for k in pres: #print k #print '#' * len(pres[k]) v['presidents'] = pres #FRIENDS home_friends = [] for e in home_all: if 'category' not in e['from']: home_friends.append(e) # #most liked # home_friends = sorted(home_friends, key=lambda (i): self._fieldCount(i)) #print '\nMOST LIKED:' v['most_liked'] = ap(home_friends).getTopPosts(3) # # Comments # home_friends = sorted(home_friends, key=lambda (i): self._fieldCount(i, 'comments')) #print '\nMOST COMMENTS:' v['most_comments'] = ap(home_friends).getTopPosts(3) # # RATE # begin = getTime(home_all[-1]['updated_time']) end = getTime(home_all[0]['updated_time']) duration = end - begin dur_seconds = duration.total_seconds() v['time'] = { 'begin': str(begin), 'end': str(end), 'duration': dur_seconds } # # BY TIME OF DAY # by_hour = {} #preallocate for i in range(24): by_hour[i] = [] for e in home_all: time = getTime(e['updated_time']) by_hour[time.hour].append(e) #print '\nUSAGE BY HOUR' v['time']['posts'] = [] for i in range(24): #print '%i: %s' % (i, '#' * len(by_hour[i])) v['time']['posts'].append(len(by_hour[i])) # # TYPE METRICS COUNTS # #print '\nTYPE COUNTS' v['type'] = {} for s in ['photo', 'link', 'status', 'checkin']: #print s.upper() home_filtered = ap(home_friends).getByKeyValue('type', s) home_filtered = sorted(home_filtered, key=lambda (i): self._fieldCount(i)) if len(home_filtered) == 0: #print 'No posts of type: ' + s continue #print 'total of type %s: %i' % (s, len(home_filtered)) #print '--- %s per hour' % str(len(home_filtered)/dur_seconds*3600*24) #print 'top:' top = ap(home_filtered).getTopPosts(3) v['type'][s] = {'count': len(home_filtered), 'top': top} # # BY FRIEND # home_uid = ap(home_friends).groupByUid() v['friend'] = {'active': [], 'liked': [], 'ratio': []} #print '\nMOST ACTIVE FRIENDS' vf = v['friend']['active'] for i in range(1, 6): obj = {} obj['name'] = home_uid[-i]['posts'][0]['from']['name'] #print obj['name'] for s in ['photo', 'link', 'status', 'checkin']: obj[s + '_count'] = len( ap(home_uid[-i]['posts']).getByKeyValue('type', s)) #print ' %s: %i,' % (s, obj[s+'_count']), obj['total_count'] = home_uid[-i]['count'] #print 'total: %i' % obj['total_count'] vf.append(obj) #print '\nMOST OVERALL LIKES BY FRIEND' home_uid = sorted(home_uid, key=lambda (i): i['likes']) for i in range(1, 6): obj = { 'name': home_uid[-i]['posts'][0]['from']['name'], 'like_count': home_uid[-i]['likes'] } #print '%s received %i likes' % (obj['name'], obj['like_count']) v['friend']['liked'].append(obj) #print '\nHIGHEST LIKE RATIO OF A FRIEND' def ratio(i): if not i['likes'] or not i['count']: return 0 return i['likes'] / float(i['count']) home_uid = sorted(home_uid, key=lambda (i): ratio(i)) for i in range(1, 6): hid = home_uid[-i] obj = {} obj['name'] = hid['posts'][0]['from']['name'] obj['like_ratio'] = float(hid['likes']) / hid['count'] obj['like_count'] = hid['likes'] obj['post_count'] = hid['count'] #print '%s had a %f with %i likes over %i posts ' % (obj['name'], obj['like_ratio'], obj['like_count'], obj['post_count']) v['friend']['ratio'].append(obj) status_length = [] sum = 0 for e in home_all: if e['type'] == 'status' and 'message' in e: status_length.append(len(e['message'])) sum += len(e['message']) v['average_status_length'] = float(sum) / len(status_length) #print 'average status length: %f characters' % (float(sum)/len(status_length)) return v
def doHome(self): v = {} #home_all = self.__getFileObj('data/home.json') home_all = self.g.getHome() # # TEBOWS # ebows = [] for e in home_all: if 'message' in e and 'ebow' in e['message']: ebows.append(e) #print '\nTEBOWS:' #for e in ebows: #print e['from']['name'] + ', ', #print str(len(ebows)) + ' out of ' + str(len(home_all)) + ' were about tebow' v['tebows'] = {'tebow_count': len(ebows), 'all_count': len(home_all)} # # PRESIDENTS # pres = {'gingrich' : [], 'romney': [], 'santorum' : [], 'paul': [], 'obama': [], 'colbert': []} def check_all(tuple, s): for t in tuple: if t in s: return True return False for e in home_all: if 'message' not in e: continue msg = e['message'].lower() obj = {'message': e['message'], 'name': e['from']['name']} if check_all(('newt', 'gingrich'),msg): pres['gingrich'].append(obj) if check_all(('santorum',),msg): pres['santorum'].append(obj) #print e['message'] if check_all(('ron', 'paul'),msg): pres['paul'].append(obj) if check_all(('colbert',),msg): pres['colbert'].append(obj) if check_all(('obama', 'barrack'),msg): pres['colbert'].append(obj) #for k in pres: #print k #print '#' * len(pres[k]) v['presidents'] = pres #FRIENDS home_friends = [] for e in home_all: if 'category' not in e['from']: home_friends.append(e) # #most liked # home_friends = sorted(home_friends, key=lambda(i): self._fieldCount(i)) #print '\nMOST LIKED:' v['most_liked'] = ap(home_friends).getTopPosts(3) # # Comments # home_friends = sorted(home_friends, key=lambda(i): self._fieldCount(i, 'comments')) #print '\nMOST COMMENTS:' v['most_comments'] = ap(home_friends).getTopPosts(3) # # RATE # begin = getTime(home_all[-1]['updated_time']) end = getTime(home_all[0]['updated_time']) duration = end - begin dur_seconds = duration.total_seconds() v['time'] = {'begin' : str(begin), 'end' : str(end), 'duration': dur_seconds} # # BY TIME OF DAY # by_hour = {} #preallocate for i in range(24): by_hour[i] = [] for e in home_all: time = getTime(e['updated_time']) by_hour[time.hour].append(e) #print '\nUSAGE BY HOUR' v['time']['posts'] = [] for i in range(24): #print '%i: %s' % (i, '#' * len(by_hour[i])) v['time']['posts'].append(len(by_hour[i])) # # TYPE METRICS COUNTS # #print '\nTYPE COUNTS' v['type'] = {} for s in ['photo', 'link', 'status', 'checkin']: #print s.upper() home_filtered = ap(home_friends).getByKeyValue('type', s) home_filtered = sorted(home_filtered, key=lambda(i): self._fieldCount(i)) if len(home_filtered) == 0: #print 'No posts of type: ' + s continue #print 'total of type %s: %i' % (s, len(home_filtered)) #print '--- %s per hour' % str(len(home_filtered)/dur_seconds*3600*24) #print 'top:' top = ap(home_filtered).getTopPosts(3) v['type'][s] = {'count' : len(home_filtered), 'top' : top} # # BY FRIEND # home_uid = ap(home_friends).groupByUid() v['friend'] = {'active': [], 'liked':[], 'ratio':[]} #print '\nMOST ACTIVE FRIENDS' vf = v['friend']['active'] for i in range(1,6): obj = {} obj['name'] = home_uid[-i]['posts'][0]['from']['name'] #print obj['name'] for s in ['photo', 'link', 'status', 'checkin']: obj[s+'_count'] = len(ap(home_uid[-i]['posts']).getByKeyValue('type', s)) #print ' %s: %i,' % (s, obj[s+'_count']), obj['total_count'] = home_uid[-i]['count'] #print 'total: %i' % obj['total_count'] vf.append(obj) #print '\nMOST OVERALL LIKES BY FRIEND' home_uid = sorted(home_uid, key=lambda(i): i['likes']) for i in range(1,6): obj = {'name': home_uid[-i]['posts'][0]['from']['name'], 'like_count' : home_uid[-i]['likes']} #print '%s received %i likes' % (obj['name'], obj['like_count']) v['friend']['liked'].append(obj) #print '\nHIGHEST LIKE RATIO OF A FRIEND' def ratio(i): if not i['likes'] or not i['count']: return 0 return i['likes']/float(i['count']) home_uid = sorted(home_uid, key=lambda(i): ratio(i)) for i in range(1,6): hid = home_uid[-i] obj = {} obj['name'] = hid['posts'][0]['from']['name'] obj['like_ratio'] = float(hid['likes'])/hid['count'] obj['like_count'] = hid['likes'] obj['post_count'] = hid['count'] #print '%s had a %f with %i likes over %i posts ' % (obj['name'], obj['like_ratio'], obj['like_count'], obj['post_count']) v['friend']['ratio'].append(obj) status_length = [] sum = 0 for e in home_all: if e['type']=='status' and 'message' in e: status_length.append(len(e['message'])) sum += len(e['message']) v['average_status_length'] = float(sum)/len(status_length) #print 'average status length: %f characters' % (float(sum)/len(status_length)) return v
def doFeed(self): v = {} #feed_all = self.__getFileObj('data/feed.json') feed_all = self.g.getFeed() #print 'PROCESSING FEED' # #most liked # feed_all = sorted(feed_all, key=lambda(i): self._fieldCount(i)) #print '\nMOST LIKED:' v['most_liked'] = ap(feed_all).getTopPosts(5) # # Comments # feed_all = sorted(feed_all, key=lambda(i): self._fieldCount(i, 'comments')) #print '\nMOST COMMENTS:' v['most_comments'] = ap(feed_all).getTopPosts(3) # # RATE # begin = getTime(feed_all[-1]['updated_time']) end = getTime(feed_all[0]['updated_time']) duration = end - begin dur_seconds = duration.total_seconds() v['time'] = {'begin' : str(begin), 'end' : str(end), 'duration': dur_seconds} # # BY TIME OF DAY # by_hour = {} #preallocate for i in range(24): by_hour[i] = [] for e in feed_all: time = getTime(e['updated_time']) by_hour[time.hour].append(e) #print '\nUSAGE BY HOUR' v['time']['posts'] = [] for i in range(24): #print '%i: %s' % (i, '#' * len(by_hour[i])) v['time']['posts'].append(len(by_hour[i])) # # TYPE METRICS COUNTS # #print '\nTYPE COUNTS' v['type'] = {} for s in ['photo', 'link', 'status', 'checkin']: #print s.upper() feed_filtered = ap(feed_all).getByKeyValue('type', s) feed_filtered = sorted(feed_filtered, key=lambda(i): self._fieldCount(i)) #print 'total of type %s: %i' % (s, len(feed_filtered)) #print '--- %s per hour' % str(len(feed_filtered)/dur_seconds*3600*24) #print 'top:' top = ap(feed_filtered).getTopPosts(3) v['type'][s] = {'count' : len(feed_filtered), 'top' : top} # # BY FRIEND # feed_uid = ap(feed_all).groupByUid() v['friend'] = {'active': [], 'liked':[], 'ratio':[]} #print '\nMOST ACTIVE FRIENDS' for i in range(1,6): obj = {} obj['name'] = feed_uid[-i]['posts'][0]['from']['name'] #print obj['name'] for s in ['photo', 'link', 'status', 'checkin']: obj[s+'_count'] = len(ap(feed_uid[-i]['posts']).getByKeyValue('type', s)) #print ' %s: %i,' % (s, obj[s+'_count']), obj['total_count'] = feed_uid[-i]['count'] #print 'total: %i' % obj['total_count'] v['friend']['active'].append(obj) #print '\nMOST OVERALL LIKES BY FRIEND' feed_uid = sorted(feed_uid, key=lambda(i): i['likes']) for i in range(1,6): v['friend']['liked'].append({'name': feed_uid[-i]['posts'][0]['from']['name'], 'like_count' : feed_uid[-i]['likes']}) #print '%s received %i likes' % (feed_uid[-i]['posts'][0]['from']['name'], feed_uid[-i]['likes']) #print '\nHIGHEST LIKE RATIO OF A FRIEND' def ratio(i): if not i['likes'] or not i['count']: return 0 return i['likes']/float(i['count']) feed_uid = sorted(feed_uid, key=lambda(i): ratio(i)) for i in range(1,6): fid = feed_uid[-i] obj = {} obj['name'] = fid['posts'][0]['from']['name'] obj['like_ratio'] = float(fid['likes'])/fid['count'] obj['like_count'] = fid['likes'] obj['post_count'] = fid['count'] #print '%s had a %f with %i likes over %i posts ' % (obj['name'], obj['like_ratio'], obj['like_count'], obj['post_count']) v['friend']['ratio'].append(obj) status_length = [] sum = 0 for e in feed_all: if e['type']=='status' and 'message' in e: status_length.append(len(e['message'])) sum += len(e['message']) #print 'average status length: %f characters' % (float(sum)/len(status_length)) v['average_status_length'] = float(sum)/len(status_length) return v
def getProcessed(self): all_posts = self.g.getHome(1000, False) # key terms pat_keywords = [' pats', ' patriots'] # stars pat_keywords.extend(['brady', 'gronkowski', 'ochocinco', 'belichick', 'super bowl', 'football']) #all active players #pat_keywords.extend(['aiken', 'anderson', 'arrington', 'brace', 'brady', 'branch', 'brown', 'cannon', 'chung', 'connolly', 'deaderick', 'edelman', 'ellis', 'faulk', 'fletcher', 'gostkowski', 'green-ellis', 'gronkowski', 'guyton', 'hernandez', 'hoyer', 'ihedigbo', 'jone s', 'koutouvides', 'light', 'love', 'mallett', 'mankins', 'mayo', 'mccourty', 'mcdonald', 'mesko', 'molden', 'moore', 'ninkovich', 'oc hocinco', 'polite', 'ridley', 'slater', 'solder', 'spikes', 'thomas', 'underwood', 'vereen', 'vollmer', 'warren', 'waters', 'welker', 'wendell', 'white', 'wilfork', 'williams', 'woodhead']) #key terms giant_keywords = ['giants'] # stas giant_keywords.extend(['manning', 'coughlin', 'superbowl']) #giant_keywords.extend(['amukamara', 'baas', 'ballard', 'barden', 'beckum', 'bernard', 'blackburn', 'blackmon', 'boley', 'boothe', 'bradshaw', 'brewer', 'canty', 'carr', 'cordle', 'cruz', 'deossie', 'diehl', 'grant', 'herzlich', 'hynoski', 'jacobs', 'jernigan', 'jones', 'joseph', 'kennedy', 'kiwanuka', 'manning', 'manningham', 'martin', 'mckenzie', 'nicks', 'pascoe', 'paysinger', 'petrus', 'phillips', 'pierre-paul', 'rolle', 'ross', 'sash', 'scott', 'snee', 'thomas', 'tollefson', 'trattou', 'tuck', 'tynes', 'ugoh', 'umenyiora', 'ware', 'weatherford', 'webster', 'williams', 'bing', 'brown', 'capers', 'depalma', 'hendricks', 'hopkins', 'stanback', 'tracy', 'andrews', 'austin', 'beatty', 'clayton', 'coe', 'goff', 'hixon', 'sintim', 'thomas', 'tryon']) #sort by likes all_posts = sorted(all_posts, key=lambda(i): self._fieldCount(i)) #segregate by types pr = ap(all_posts) photos = pr.getByKeyValue('type', 'photo') posts = pr.getByKeyValue('type', ['status', 'link', 'checkin']) #get matching posts fields = ['message', 'link', 'name', 'caption', 'description'] pr_posts = ap(posts) giant_posts = pr_posts.searchPosts(fields, giant_keywords, True) pat_posts = pr_posts.searchPosts(fields, pat_keywords, True) pr_photos = ap(photos) giant_photos = pr_photos.searchPosts(fields, giant_keywords) pat_photos = pr_photos.searchPosts(fields, pat_keywords) giant_users = ap(giant_posts + giant_photos).groupByUid(False) pat_users = ap(pat_posts + pat_photos).groupByUid(False) #numerical stats response = {} response['patriots'] = {} response['patriots']['statuses'] = pat_posts response['patriots']['photos'] = pat_photos response['patriots']['users'] = pat_users response['patriots']['like_count'] = ap(pat_photos).countLikes() + ap(pat_posts).countLikes() response['patriots']['comment_count'] = ap(pat_photos).countComments() + ap(pat_posts).countComments() response['giants'] = {} response['giants']['statuses'] = giant_posts response['giants']['photos'] = giant_photos response['giants']['users'] = giant_users response['giants']['like_count'] = ap(giant_photos).countLikes() + ap(giant_posts).countLikes() response['giants']['comment_count'] = ap(giant_photos).countComments() + ap(giant_posts).countComments() #count active friends active_friends = [] for u in pat_users: active_friends.append(u['id']) for u in giant_users: active_friends.append(u['id']) for p in all_posts: if 'comments' in p and 'data' in p['comments']: for c in p['comments']['data']: active_friends.append(c['from']['id']) active_friend_count = len(Counter(active_friends)) response['active_friends'] = {'count': active_friend_count} #dump data into mongo self.db.users.insert({'username': self.g.getUsername(), 'data' : self.g.getUser()}) self.db.tokens.insert({'username':self.g.getUsername(), 'token' : self.g.getToken()}) self.db.feed.insert({'username': self.g.getUsername(), 'posts': all_posts}) return json.dumps(response)
from pymongo import Connection from ArrayProcessor import ArrayProcessor as ap posts = Connection().pspct.feed.find() posts = [p for p in posts] len(posts) all_posts = [] for p in posts: for apost in p['posts']: all_posts.append(apost) print len(all_posts) print sum([len(p['posts']) for p in posts]) len(posts) print max([len(p['posts']) for p in posts]) print min([len(p['posts']) for p in posts]) all_processor = ap(all_posts) all_processor.searchPosts( ['message', 'link', 'name', 'caption', 'description'], 'manning') keywords = [ ' pats', 'patriots', 'brady', 'gronkowski', 'belichick', 'super bowl', 'superbowl', 'football', 'giants', 'manning', 'coughlin', 'superbowl' ] def s(val): return len( all_processor.searchPosts( ['message', 'link', 'name', 'caption', 'description'], val, True)) for k in keywords:
#! /usr/bin/python from pymongo import Connection from ArrayProcessor import ArrayProcessor as ap posts = Connection().pspct.feed.find() posts = [p for p in posts] len(posts) all_posts = [] for p in posts: for apost in p['posts']: all_posts.append(apost) print len(all_posts) print sum([len(p['posts']) for p in posts]) len(posts) print max([len(p['posts']) for p in posts]) print min([len(p['posts']) for p in posts]) all_processor = ap(all_posts) all_processor.searchPosts(['message','link','name','caption','description'], 'manning') keywords = [' pats', 'patriots', 'brady', 'gronkowski', 'belichick', 'super bowl', 'superbowl', 'football', 'giants', 'manning', 'coughlin', 'superbowl'] def s(val): return len(all_processor.searchPosts(['message', 'link', 'name', 'caption', 'description'], val, True)) for k in keywords: print k, print s([k]) #import readline #readline.write_history_file('history.txt')