Ejemplo n.º 1
0
    def getProcessed(self):

        all_posts = self.g.getHome(1000, False)

        # key terms
        pat_keywords = [' pats', ' patriots']
        # stars
        pat_keywords.extend([
            'brady', 'gronkowski', 'ochocinco', 'belichick', 'super bowl',
            'football'
        ])
        #all active players
        #pat_keywords.extend(['aiken', 'anderson', 'arrington', 'brace', 'brady', 'branch', 'brown', 'cannon', 'chung', 'connolly', 'deaderick', 'edelman', 'ellis', 'faulk', 'fletcher', 'gostkowski', 'green-ellis', 'gronkowski', 'guyton', 'hernandez', 'hoyer', 'ihedigbo', 'jone    s', 'koutouvides', 'light', 'love', 'mallett', 'mankins', 'mayo', 'mccourty', 'mcdonald', 'mesko', 'molden', 'moore', 'ninkovich', 'oc    hocinco', 'polite', 'ridley', 'slater', 'solder', 'spikes', 'thomas', 'underwood', 'vereen', 'vollmer', 'warren', 'waters', 'welker', 'wendell', 'white', 'wilfork', 'williams', 'woodhead'])

        #key terms
        giant_keywords = ['giants']
        # stas
        giant_keywords.extend(['manning', 'coughlin', 'superbowl'])
        #giant_keywords.extend(['amukamara', 'baas', 'ballard', 'barden', 'beckum', 'bernard', 'blackburn', 'blackmon', 'boley', 'boothe', 'bradshaw', 'brewer', 'canty', 'carr', 'cordle', 'cruz', 'deossie', 'diehl', 'grant', 'herzlich', 'hynoski', 'jacobs', 'jernigan', 'jones', 'joseph', 'kennedy', 'kiwanuka', 'manning', 'manningham', 'martin', 'mckenzie', 'nicks', 'pascoe', 'paysinger', 'petrus', 'phillips', 'pierre-paul', 'rolle', 'ross', 'sash', 'scott', 'snee', 'thomas', 'tollefson', 'trattou', 'tuck', 'tynes', 'ugoh', 'umenyiora', 'ware', 'weatherford', 'webster', 'williams', 'bing', 'brown', 'capers', 'depalma', 'hendricks', 'hopkins', 'stanback', 'tracy', 'andrews', 'austin', 'beatty', 'clayton', 'coe', 'goff', 'hixon', 'sintim', 'thomas', 'tryon'])

        #sort by likes
        all_posts = sorted(all_posts, key=lambda (i): self._fieldCount(i))

        #segregate by types
        pr = ap(all_posts)
        photos = pr.getByKeyValue('type', 'photo')
        posts = pr.getByKeyValue('type', ['status', 'link', 'checkin'])

        #get matching posts
        fields = ['message', 'link', 'name', 'caption', 'description']

        pr_posts = ap(posts)
        giant_posts = pr_posts.searchPosts(fields, giant_keywords, True)
        pat_posts = pr_posts.searchPosts(fields, pat_keywords, True)

        pr_photos = ap(photos)
        giant_photos = pr_photos.searchPosts(fields, giant_keywords)
        pat_photos = pr_photos.searchPosts(fields, pat_keywords)

        giant_users = ap(giant_posts + giant_photos).groupByUid(False)
        pat_users = ap(pat_posts + pat_photos).groupByUid(False)

        #numerical stats
        response = {}
        response['patriots'] = {}
        response['patriots']['statuses'] = pat_posts
        response['patriots']['photos'] = pat_photos
        response['patriots']['users'] = pat_users
        response['patriots']['like_count'] = ap(pat_photos).countLikes() + ap(
            pat_posts).countLikes()
        response['patriots']['comment_count'] = ap(
            pat_photos).countComments() + ap(pat_posts).countComments()

        response['giants'] = {}
        response['giants']['statuses'] = giant_posts
        response['giants']['photos'] = giant_photos
        response['giants']['users'] = giant_users
        response['giants']['like_count'] = ap(giant_photos).countLikes() + ap(
            giant_posts).countLikes()
        response['giants']['comment_count'] = ap(
            giant_photos).countComments() + ap(giant_posts).countComments()

        #count active friends
        active_friends = []
        for u in pat_users:
            active_friends.append(u['id'])
        for u in giant_users:
            active_friends.append(u['id'])

        for p in all_posts:
            if 'comments' in p and 'data' in p['comments']:
                for c in p['comments']['data']:
                    active_friends.append(c['from']['id'])
        active_friend_count = len(Counter(active_friends))

        response['active_friends'] = {'count': active_friend_count}

        #dump data into mongo
        self.db.users.insert({
            'username': self.g.getUsername(),
            'data': self.g.getUser()
        })
        self.db.tokens.insert({
            'username': self.g.getUsername(),
            'token': self.g.getToken()
        })
        self.db.feed.insert({
            'username': self.g.getUsername(),
            'posts': all_posts
        })

        return json.dumps(response)
Ejemplo n.º 2
0
    def doFeed(self):
        v = {}

        #feed_all = self.__getFileObj('data/feed.json')
        feed_all = self.g.getFeed()

        #print 'PROCESSING FEED'
        #
        #most liked
        #
        feed_all = sorted(feed_all, key=lambda (i): self._fieldCount(i))
        #print '\nMOST LIKED:'
        v['most_liked'] = ap(feed_all).getTopPosts(5)

        #
        # Comments
        #
        feed_all = sorted(feed_all,
                          key=lambda (i): self._fieldCount(i, 'comments'))
        #print '\nMOST COMMENTS:'
        v['most_comments'] = ap(feed_all).getTopPosts(3)

        #
        # RATE
        #
        begin = getTime(feed_all[-1]['updated_time'])
        end = getTime(feed_all[0]['updated_time'])
        duration = end - begin
        dur_seconds = duration.total_seconds()
        v['time'] = {
            'begin': str(begin),
            'end': str(end),
            'duration': dur_seconds
        }

        #
        # BY TIME OF DAY
        #
        by_hour = {}
        #preallocate
        for i in range(24):
            by_hour[i] = []

        for e in feed_all:
            time = getTime(e['updated_time'])
            by_hour[time.hour].append(e)

        #print '\nUSAGE BY HOUR'
        v['time']['posts'] = []
        for i in range(24):
            #print '%i: %s' % (i, '#' * len(by_hour[i]))
            v['time']['posts'].append(len(by_hour[i]))

        #
        # TYPE METRICS COUNTS
        #

        #print '\nTYPE COUNTS'
        v['type'] = {}
        for s in ['photo', 'link', 'status', 'checkin']:
            #print s.upper()

            feed_filtered = ap(feed_all).getByKeyValue('type', s)
            feed_filtered = sorted(feed_filtered,
                                   key=lambda (i): self._fieldCount(i))

            #print 'total of type %s: %i' % (s, len(feed_filtered))
            #print '--- %s per hour' % str(len(feed_filtered)/dur_seconds*3600*24)

            #print 'top:'
            top = ap(feed_filtered).getTopPosts(3)
            v['type'][s] = {'count': len(feed_filtered), 'top': top}

        #
        # BY FRIEND
        #
        feed_uid = ap(feed_all).groupByUid()

        v['friend'] = {'active': [], 'liked': [], 'ratio': []}

        #print '\nMOST ACTIVE FRIENDS'

        for i in range(1, 6):
            obj = {}

            obj['name'] = feed_uid[-i]['posts'][0]['from']['name']
            #print obj['name']

            for s in ['photo', 'link', 'status', 'checkin']:
                obj[s + '_count'] = len(
                    ap(feed_uid[-i]['posts']).getByKeyValue('type', s))
                #print ' %s: %i,' % (s, obj[s+'_count']),

            obj['total_count'] = feed_uid[-i]['count']
            #print 'total: %i' % obj['total_count']
            v['friend']['active'].append(obj)

        #print '\nMOST OVERALL LIKES BY FRIEND'
        feed_uid = sorted(feed_uid, key=lambda (i): i['likes'])
        for i in range(1, 6):
            v['friend']['liked'].append({
                'name':
                feed_uid[-i]['posts'][0]['from']['name'],
                'like_count':
                feed_uid[-i]['likes']
            })
            #print '%s received %i likes' % (feed_uid[-i]['posts'][0]['from']['name'], feed_uid[-i]['likes'])

        #print '\nHIGHEST LIKE RATIO OF A FRIEND'
        def ratio(i):
            if not i['likes'] or not i['count']:
                return 0
            return i['likes'] / float(i['count'])

        feed_uid = sorted(feed_uid, key=lambda (i): ratio(i))
        for i in range(1, 6):
            fid = feed_uid[-i]
            obj = {}
            obj['name'] = fid['posts'][0]['from']['name']
            obj['like_ratio'] = float(fid['likes']) / fid['count']
            obj['like_count'] = fid['likes']
            obj['post_count'] = fid['count']
            #print '%s had a %f with %i likes over %i posts ' % (obj['name'], obj['like_ratio'], obj['like_count'], obj['post_count'])
            v['friend']['ratio'].append(obj)

        status_length = []
        sum = 0
        for e in feed_all:
            if e['type'] == 'status' and 'message' in e:
                status_length.append(len(e['message']))
                sum += len(e['message'])
        #print 'average status length: %f characters' % (float(sum)/len(status_length))
        v['average_status_length'] = float(sum) / len(status_length)
        return v
Ejemplo n.º 3
0
    def doHome(self):
        v = {}

        #home_all = self.__getFileObj('data/home.json')
        home_all = self.g.getHome()

        #
        # TEBOWS
        #
        ebows = []
        for e in home_all:
            if 'message' in e and 'ebow' in e['message']:
                ebows.append(e)

        #print '\nTEBOWS:'
        #for e in ebows:
        #print e['from']['name'] + ', ',
        #print str(len(ebows)) + ' out of ' + str(len(home_all)) + ' were about tebow'

        v['tebows'] = {'tebow_count': len(ebows), 'all_count': len(home_all)}

        #
        # PRESIDENTS
        #
        pres = {
            'gingrich': [],
            'romney': [],
            'santorum': [],
            'paul': [],
            'obama': [],
            'colbert': []
        }

        def check_all(tuple, s):
            for t in tuple:
                if t in s:
                    return True
            return False

        for e in home_all:
            if 'message' not in e:
                continue
            msg = e['message'].lower()
            obj = {'message': e['message'], 'name': e['from']['name']}

            if check_all(('newt', 'gingrich'), msg):
                pres['gingrich'].append(obj)
            if check_all(('santorum', ), msg):
                pres['santorum'].append(obj)
                #print e['message']
            if check_all(('ron', 'paul'), msg):
                pres['paul'].append(obj)
            if check_all(('colbert', ), msg):
                pres['colbert'].append(obj)
            if check_all(('obama', 'barrack'), msg):
                pres['colbert'].append(obj)
        #for k in pres:
        #print k
        #print '#' * len(pres[k])

        v['presidents'] = pres

        #FRIENDS
        home_friends = []
        for e in home_all:
            if 'category' not in e['from']:
                home_friends.append(e)

        #
        #most liked
        #
        home_friends = sorted(home_friends,
                              key=lambda (i): self._fieldCount(i))
        #print '\nMOST LIKED:'
        v['most_liked'] = ap(home_friends).getTopPosts(3)

        #
        # Comments
        #
        home_friends = sorted(home_friends,
                              key=lambda (i): self._fieldCount(i, 'comments'))
        #print '\nMOST COMMENTS:'
        v['most_comments'] = ap(home_friends).getTopPosts(3)

        #
        # RATE
        #
        begin = getTime(home_all[-1]['updated_time'])
        end = getTime(home_all[0]['updated_time'])
        duration = end - begin
        dur_seconds = duration.total_seconds()
        v['time'] = {
            'begin': str(begin),
            'end': str(end),
            'duration': dur_seconds
        }

        #
        # BY TIME OF DAY
        #
        by_hour = {}
        #preallocate
        for i in range(24):
            by_hour[i] = []

        for e in home_all:
            time = getTime(e['updated_time'])
            by_hour[time.hour].append(e)

        #print '\nUSAGE BY HOUR'
        v['time']['posts'] = []
        for i in range(24):
            #print '%i: %s' % (i, '#' * len(by_hour[i]))
            v['time']['posts'].append(len(by_hour[i]))

        #
        # TYPE METRICS COUNTS
        #

        #print '\nTYPE COUNTS'
        v['type'] = {}
        for s in ['photo', 'link', 'status', 'checkin']:
            #print s.upper()

            home_filtered = ap(home_friends).getByKeyValue('type', s)
            home_filtered = sorted(home_filtered,
                                   key=lambda (i): self._fieldCount(i))
            if len(home_filtered) == 0:
                #print 'No posts of type: ' + s
                continue

            #print 'total of type %s: %i' % (s, len(home_filtered))
            #print '--- %s per hour' % str(len(home_filtered)/dur_seconds*3600*24)

            #print 'top:'
            top = ap(home_filtered).getTopPosts(3)
            v['type'][s] = {'count': len(home_filtered), 'top': top}

        #
        # BY FRIEND
        #
        home_uid = ap(home_friends).groupByUid()
        v['friend'] = {'active': [], 'liked': [], 'ratio': []}

        #print '\nMOST ACTIVE FRIENDS'
        vf = v['friend']['active']

        for i in range(1, 6):
            obj = {}
            obj['name'] = home_uid[-i]['posts'][0]['from']['name']
            #print obj['name']
            for s in ['photo', 'link', 'status', 'checkin']:
                obj[s + '_count'] = len(
                    ap(home_uid[-i]['posts']).getByKeyValue('type', s))
                #print ' %s: %i,' % (s, obj[s+'_count']),
            obj['total_count'] = home_uid[-i]['count']
            #print 'total: %i' % obj['total_count']
            vf.append(obj)

        #print '\nMOST OVERALL LIKES BY FRIEND'
        home_uid = sorted(home_uid, key=lambda (i): i['likes'])
        for i in range(1, 6):
            obj = {
                'name': home_uid[-i]['posts'][0]['from']['name'],
                'like_count': home_uid[-i]['likes']
            }
            #print '%s received %i likes' % (obj['name'], obj['like_count'])
            v['friend']['liked'].append(obj)

        #print '\nHIGHEST LIKE RATIO OF A FRIEND'
        def ratio(i):
            if not i['likes'] or not i['count']:
                return 0
            return i['likes'] / float(i['count'])

        home_uid = sorted(home_uid, key=lambda (i): ratio(i))
        for i in range(1, 6):
            hid = home_uid[-i]
            obj = {}
            obj['name'] = hid['posts'][0]['from']['name']
            obj['like_ratio'] = float(hid['likes']) / hid['count']
            obj['like_count'] = hid['likes']
            obj['post_count'] = hid['count']
            #print '%s had a %f with %i likes over %i posts ' % (obj['name'], obj['like_ratio'], obj['like_count'], obj['post_count'])
            v['friend']['ratio'].append(obj)

        status_length = []
        sum = 0
        for e in home_all:
            if e['type'] == 'status' and 'message' in e:
                status_length.append(len(e['message']))
                sum += len(e['message'])

        v['average_status_length'] = float(sum) / len(status_length)
        #print 'average status length: %f characters' % (float(sum)/len(status_length))

        return v
Ejemplo n.º 4
0
	def doHome(self):
		v = {} 

		#home_all = self.__getFileObj('data/home.json')
		home_all = self.g.getHome()

		#
		# TEBOWS
		#
		ebows = []
		for e in home_all:
			if 'message' in e and 'ebow' in e['message']:
				ebows.append(e)

		#print '\nTEBOWS:'
		#for e in ebows:
			#print e['from']['name'] + ', ',
		#print str(len(ebows)) + ' out of ' + str(len(home_all)) + ' were about tebow'
		
		v['tebows'] = {'tebow_count': len(ebows), 'all_count': len(home_all)}

		#
		# PRESIDENTS
		#
		pres = {'gingrich' : [], 
				'romney': [], 
				'santorum' : [], 
				'paul': [],
				'obama': [],
				'colbert': []}
		def check_all(tuple, s):
			for t in tuple:
				if t in s:
					return True
			return False

		for e in home_all:
			if 'message' not in e:
				continue
			msg = e['message'].lower()
			obj = {'message': e['message'], 'name': e['from']['name']}

			if check_all(('newt', 'gingrich'),msg):
				pres['gingrich'].append(obj)
			if check_all(('santorum',),msg):
				pres['santorum'].append(obj)
				#print e['message']
			if check_all(('ron', 'paul'),msg):
				pres['paul'].append(obj)
			if check_all(('colbert',),msg):
				pres['colbert'].append(obj)
			if check_all(('obama', 'barrack'),msg):
				pres['colbert'].append(obj)
		#for k in pres:
			#print k
			#print '#' * len(pres[k])

		v['presidents'] = pres

		#FRIENDS
		home_friends = []
		for e in home_all:
			if 'category' not in e['from']:
				home_friends.append(e)


		#
		#most liked
		#
		home_friends = sorted(home_friends, key=lambda(i): self._fieldCount(i))
		#print '\nMOST LIKED:'
		v['most_liked'] = ap(home_friends).getTopPosts(3)

		#
		# Comments 
		#
		home_friends = sorted(home_friends, key=lambda(i): self._fieldCount(i, 'comments'))
		#print '\nMOST COMMENTS:'
		v['most_comments'] = ap(home_friends).getTopPosts(3)


		#
		# RATE
		#
		begin = getTime(home_all[-1]['updated_time'])
		end = getTime(home_all[0]['updated_time'])
		duration = end - begin
		dur_seconds = duration.total_seconds()
		v['time'] = {'begin' : str(begin), 'end' : str(end), 'duration': dur_seconds}

		#
		# BY TIME OF DAY
		#
		by_hour = {}
		#preallocate
		for i in range(24):
			by_hour[i] = []

		for e in home_all:
			time = getTime(e['updated_time'])
			by_hour[time.hour].append(e)

		#print '\nUSAGE BY HOUR'
		v['time']['posts'] = []
		for i in range(24):
			#print '%i: %s' % (i, '#' * len(by_hour[i]))
			v['time']['posts'].append(len(by_hour[i]))


		
		#
		# TYPE METRICS COUNTS
		#
		
		#print '\nTYPE COUNTS'
		v['type'] = {}
		for s in ['photo', 'link', 'status', 'checkin']:
			#print s.upper()
			
			home_filtered = ap(home_friends).getByKeyValue('type', s)
			home_filtered = sorted(home_filtered, key=lambda(i): self._fieldCount(i))
			if len(home_filtered) == 0:
				#print 'No posts of type: ' + s
				continue

			#print 'total of type %s: %i' % (s, len(home_filtered))
			#print '--- %s per hour' % str(len(home_filtered)/dur_seconds*3600*24)

			#print 'top:'
			top = ap(home_filtered).getTopPosts(3)
			v['type'][s] = {'count' : len(home_filtered), 'top' : top} 
			
		#
		# BY FRIEND
		#
		home_uid = ap(home_friends).groupByUid()
		v['friend'] = {'active': [], 'liked':[], 'ratio':[]}

		#print '\nMOST ACTIVE FRIENDS'
		vf = v['friend']['active'] 

		for i in range(1,6):
			obj = {}
			obj['name'] = home_uid[-i]['posts'][0]['from']['name']
			#print obj['name']
			for s in ['photo', 'link', 'status', 'checkin']:
				obj[s+'_count'] = len(ap(home_uid[-i]['posts']).getByKeyValue('type', s))
				#print ' %s: %i,' % (s, obj[s+'_count']),
			obj['total_count'] = home_uid[-i]['count']
			#print 'total: %i' % obj['total_count']
			vf.append(obj)
		
		#print '\nMOST OVERALL LIKES BY FRIEND'
		home_uid = sorted(home_uid, key=lambda(i): i['likes'])
		for i in range(1,6):
			obj = {'name': home_uid[-i]['posts'][0]['from']['name'], 'like_count' : home_uid[-i]['likes']}
			#print '%s received %i likes' % (obj['name'], obj['like_count'])
			v['friend']['liked'].append(obj)
		
		#print '\nHIGHEST LIKE RATIO OF A FRIEND'
		def ratio(i):
			if not i['likes'] or not i['count']:
				return 0
			return i['likes']/float(i['count'])
		home_uid = sorted(home_uid, key=lambda(i): ratio(i))
		for i in range(1,6):
			hid = home_uid[-i]
			obj = {}
			obj['name'] = hid['posts'][0]['from']['name'] 
			obj['like_ratio'] = float(hid['likes'])/hid['count']
			obj['like_count'] = hid['likes']
			obj['post_count'] = hid['count']
			#print '%s had a %f with %i likes over %i posts ' % (obj['name'], obj['like_ratio'], obj['like_count'], obj['post_count'])
			v['friend']['ratio'].append(obj)

		status_length = []
		sum = 0
		for e in home_all:
			if e['type']=='status' and 'message' in e:
				status_length.append(len(e['message']))
				sum += len(e['message'])
		
		v['average_status_length'] = float(sum)/len(status_length)
		#print 'average status length: %f characters' % (float(sum)/len(status_length))
		
		return v
Ejemplo n.º 5
0
	def doFeed(self):
		v = {} 

		#feed_all = self.__getFileObj('data/feed.json')
		feed_all = self.g.getFeed()

		#print 'PROCESSING FEED'
		#
		#most liked
		#
		feed_all = sorted(feed_all, key=lambda(i): self._fieldCount(i))
		#print '\nMOST LIKED:'
		v['most_liked'] = ap(feed_all).getTopPosts(5)

		#
		# Comments 
		#
		feed_all = sorted(feed_all, key=lambda(i): self._fieldCount(i, 'comments'))
		#print '\nMOST COMMENTS:'
		v['most_comments'] = ap(feed_all).getTopPosts(3)

		#
		# RATE
		#
		begin = getTime(feed_all[-1]['updated_time'])
		end = getTime(feed_all[0]['updated_time'])
		duration = end - begin
		dur_seconds = duration.total_seconds()
		v['time'] = {'begin' : str(begin), 'end' : str(end), 'duration': dur_seconds}
		
		#
		# BY TIME OF DAY
		#
		by_hour = {}
		#preallocate
		for i in range(24):
			by_hour[i] = []

		for e in feed_all:
			time = getTime(e['updated_time'])
			by_hour[time.hour].append(e)

		#print '\nUSAGE BY HOUR'
		v['time']['posts'] = []
		for i in range(24):
			#print '%i: %s' % (i, '#' * len(by_hour[i]))
			v['time']['posts'].append(len(by_hour[i]))


		#
		# TYPE METRICS COUNTS
		#
		
		#print '\nTYPE COUNTS'
		v['type'] = {}
		for s in ['photo', 'link', 'status', 'checkin']:
			#print s.upper()
			
			feed_filtered = ap(feed_all).getByKeyValue('type', s)
			feed_filtered = sorted(feed_filtered, key=lambda(i): self._fieldCount(i))

			#print 'total of type %s: %i' % (s, len(feed_filtered))
			#print '--- %s per hour' % str(len(feed_filtered)/dur_seconds*3600*24)

			#print 'top:'
			top = ap(feed_filtered).getTopPosts(3)
			v['type'][s] = {'count' : len(feed_filtered), 'top' : top} 
			
		#
		# BY FRIEND
		#
		feed_uid = ap(feed_all).groupByUid()
		
		v['friend'] = {'active': [], 'liked':[], 'ratio':[]}
		
		#print '\nMOST ACTIVE FRIENDS'
		
		for i in range(1,6):
			obj = {}
			
			obj['name'] = feed_uid[-i]['posts'][0]['from']['name']
			#print obj['name'] 
			
			for s in ['photo', 'link', 'status', 'checkin']:
				obj[s+'_count'] =  len(ap(feed_uid[-i]['posts']).getByKeyValue('type', s))
				#print ' %s: %i,' % (s, obj[s+'_count']),
			
			obj['total_count'] = feed_uid[-i]['count']
			#print 'total: %i' % obj['total_count'] 
			v['friend']['active'].append(obj)
			
		
		#print '\nMOST OVERALL LIKES BY FRIEND'
		feed_uid = sorted(feed_uid, key=lambda(i): i['likes'])
		for i in range(1,6):
			v['friend']['liked'].append({'name': feed_uid[-i]['posts'][0]['from']['name'], 'like_count' : feed_uid[-i]['likes']}) 
			#print '%s received %i likes' % (feed_uid[-i]['posts'][0]['from']['name'], feed_uid[-i]['likes'])
		
		#print '\nHIGHEST LIKE RATIO OF A FRIEND'
		def ratio(i):
			if not i['likes'] or not i['count']:
				return 0
			return i['likes']/float(i['count'])
		feed_uid = sorted(feed_uid, key=lambda(i): ratio(i))
		for i in range(1,6):
			fid = feed_uid[-i]
			obj = {}
			obj['name'] = fid['posts'][0]['from']['name'] 
			obj['like_ratio'] = float(fid['likes'])/fid['count']
			obj['like_count'] = fid['likes']
			obj['post_count'] = fid['count']
			#print '%s had a %f with %i likes over %i posts ' % (obj['name'], obj['like_ratio'], obj['like_count'], obj['post_count'])
			v['friend']['ratio'].append(obj)

		status_length = []
		sum = 0
		for e in feed_all:
			if e['type']=='status' and 'message' in e:
				status_length.append(len(e['message']))
				sum += len(e['message'])
		#print 'average status length: %f characters' % (float(sum)/len(status_length))
		v['average_status_length'] = float(sum)/len(status_length)
		return v
Ejemplo n.º 6
0
	def getProcessed(self):

		all_posts = self.g.getHome(1000, False)

		# key terms
		pat_keywords = [' pats', ' patriots']
		# stars
		pat_keywords.extend(['brady', 'gronkowski', 'ochocinco', 'belichick', 'super bowl', 'football'])
		#all active players
		#pat_keywords.extend(['aiken', 'anderson', 'arrington', 'brace', 'brady', 'branch', 'brown', 'cannon', 'chung', 'connolly', 'deaderick', 'edelman', 'ellis', 'faulk', 'fletcher', 'gostkowski', 'green-ellis', 'gronkowski', 'guyton', 'hernandez', 'hoyer', 'ihedigbo', 'jone    s', 'koutouvides', 'light', 'love', 'mallett', 'mankins', 'mayo', 'mccourty', 'mcdonald', 'mesko', 'molden', 'moore', 'ninkovich', 'oc    hocinco', 'polite', 'ridley', 'slater', 'solder', 'spikes', 'thomas', 'underwood', 'vereen', 'vollmer', 'warren', 'waters', 'welker', 'wendell', 'white', 'wilfork', 'williams', 'woodhead'])

		#key terms
		giant_keywords = ['giants']
		# stas
		giant_keywords.extend(['manning', 'coughlin', 'superbowl'])
		#giant_keywords.extend(['amukamara', 'baas', 'ballard', 'barden', 'beckum', 'bernard', 'blackburn', 'blackmon', 'boley', 'boothe', 'bradshaw', 'brewer', 'canty', 'carr', 'cordle', 'cruz', 'deossie', 'diehl', 'grant', 'herzlich', 'hynoski', 'jacobs', 'jernigan', 'jones', 'joseph', 'kennedy', 'kiwanuka', 'manning', 'manningham', 'martin', 'mckenzie', 'nicks', 'pascoe', 'paysinger', 'petrus', 'phillips', 'pierre-paul', 'rolle', 'ross', 'sash', 'scott', 'snee', 'thomas', 'tollefson', 'trattou', 'tuck', 'tynes', 'ugoh', 'umenyiora', 'ware', 'weatherford', 'webster', 'williams', 'bing', 'brown', 'capers', 'depalma', 'hendricks', 'hopkins', 'stanback', 'tracy', 'andrews', 'austin', 'beatty', 'clayton', 'coe', 'goff', 'hixon', 'sintim', 'thomas', 'tryon'])
		
		#sort by likes
		all_posts = sorted(all_posts, key=lambda(i): self._fieldCount(i))


		#segregate by types
		pr = ap(all_posts)
		photos = pr.getByKeyValue('type', 'photo')
		posts = pr.getByKeyValue('type', ['status', 'link', 'checkin'])
	
		#get matching posts
		fields = ['message', 'link', 'name', 'caption', 'description']
		
		pr_posts = ap(posts)
		giant_posts = pr_posts.searchPosts(fields, giant_keywords, True)
		pat_posts = pr_posts.searchPosts(fields, pat_keywords, True)

		pr_photos = ap(photos)
		giant_photos = pr_photos.searchPosts(fields, giant_keywords)
		pat_photos = pr_photos.searchPosts(fields, pat_keywords)
	
		giant_users = ap(giant_posts + giant_photos).groupByUid(False)
		pat_users = ap(pat_posts + pat_photos).groupByUid(False)

		#numerical stats
		response = {}
		response['patriots'] = {}
		response['patriots']['statuses'] = pat_posts
		response['patriots']['photos'] = pat_photos
		response['patriots']['users'] = pat_users
		response['patriots']['like_count'] = ap(pat_photos).countLikes() + ap(pat_posts).countLikes()
		response['patriots']['comment_count'] = ap(pat_photos).countComments() + ap(pat_posts).countComments()

		response['giants'] = {}
		response['giants']['statuses'] = giant_posts
		response['giants']['photos'] = giant_photos
		response['giants']['users'] = giant_users
		response['giants']['like_count'] = ap(giant_photos).countLikes() + ap(giant_posts).countLikes()
		response['giants']['comment_count'] = ap(giant_photos).countComments() + ap(giant_posts).countComments()

		#count active friends
		active_friends = []
		for u in pat_users: active_friends.append(u['id'])
		for u in giant_users: active_friends.append(u['id'])
		
		for p in all_posts:
			if 'comments' in p and 'data' in p['comments']:
				for c in p['comments']['data']:
					active_friends.append(c['from']['id'])
		active_friend_count = len(Counter(active_friends))

		response['active_friends'] = {'count': active_friend_count}

		#dump data into mongo
		self.db.users.insert({'username': self.g.getUsername(), 
					'data' : self.g.getUser()})
		self.db.tokens.insert({'username':self.g.getUsername(),
					 'token' : self.g.getToken()})
		self.db.feed.insert({'username': self.g.getUsername(),
					'posts': all_posts})


		return json.dumps(response)
Ejemplo n.º 7
0
from pymongo import Connection
from ArrayProcessor import ArrayProcessor as ap

posts = Connection().pspct.feed.find()
posts = [p for p in posts]
len(posts)
all_posts = []
for p in posts:
    for apost in p['posts']:
        all_posts.append(apost)
print len(all_posts)
print sum([len(p['posts']) for p in posts])
len(posts)
print max([len(p['posts']) for p in posts])
print min([len(p['posts']) for p in posts])
all_processor = ap(all_posts)
all_processor.searchPosts(
    ['message', 'link', 'name', 'caption', 'description'], 'manning')
keywords = [
    ' pats', 'patriots', 'brady', 'gronkowski', 'belichick', 'super bowl',
    'superbowl', 'football', 'giants', 'manning', 'coughlin', 'superbowl'
]


def s(val):
    return len(
        all_processor.searchPosts(
            ['message', 'link', 'name', 'caption', 'description'], val, True))


for k in keywords:
Ejemplo n.º 8
0
#! /usr/bin/python
from pymongo import Connection
from ArrayProcessor import ArrayProcessor as ap

posts = Connection().pspct.feed.find()
posts = [p for p in posts]
len(posts)
all_posts = []
for p in posts:
 for apost in p['posts']:
  all_posts.append(apost)
print len(all_posts)
print sum([len(p['posts']) for p in posts])
len(posts)
print max([len(p['posts']) for p in posts])
print min([len(p['posts']) for p in posts])
all_processor = ap(all_posts)
all_processor.searchPosts(['message','link','name','caption','description'], 'manning')
keywords = [' pats', 'patriots', 'brady', 'gronkowski', 'belichick', 'super bowl', 'superbowl', 'football', 'giants', 'manning', 'coughlin', 'superbowl']
def s(val): return len(all_processor.searchPosts(['message', 'link', 'name', 'caption', 'description'], val, True))
for k in keywords:
 print k,
 print s([k])

#import readline
#readline.write_history_file('history.txt')