def getPartialInfluenceScore(): clusterInfoCollection = getPagesClusterInfoCollection() clusterInfluencerCollection = getClusterInfluencerScoreCollection() friendsCollection = getFriendsCollection() clusterInfluencerCollection.drop() clusterNumber = len(clusterInfoCollection.distinct('cluster')) for friend in friendsCollection.find(): _id = friend['id'] document = {} document['_id'] = _id document['cluster'] = {} for i in range(clusterNumber): document['cluster'][str(i)] = [] clusterInfluencerCollection.insert(document) pagesCursor = clusterInfoCollection.find({"count": {"$gt": 3}}) epoch = datetime.datetime.utcfromtimestamp(0) dt = 7 * 24 * 60 * 60 scores = [] done = 0 for page in pagesCursor: users = page['people'] try: users.sort(key=lambda x: x['created_time']) cluster = page['cluster'] liketime = [] for user in users: liketime.append((user['created_time'] - epoch).total_seconds()) back = 0 done += 1 print done for user in users: userId = user['id'] timeahead = (user['created_time'] - epoch).total_seconds() + dt timeback = (user['created_time'] - epoch).total_seconds() - dt ahead = bisect.bisect_right(liketime, timeahead) score = ahead - back back += 1 # print userId, cluster, score clusterInfluencerCollection.update( {'_id': userId}, {'$push': { 'cluster.' + str(cluster): score }}, upsert=False) except: print "hmmm"
def getPartialInfluenceScore(): clusterInfoCollection = getPagesClusterInfoCollection() clusterInfluencerCollection = getClusterInfluencerScoreCollection() friendsCollection = getFriendsCollection() clusterInfluencerCollection.drop() clusterNumber = len(clusterInfoCollection.distinct('cluster')) for friend in friendsCollection.find(): _id = friend['id'] document = {} document['_id'] = _id document['cluster'] = {} for i in range(clusterNumber): document['cluster'][str(i)] = [] clusterInfluencerCollection.insert(document) pagesCursor = clusterInfoCollection.find({"count": {"$gt": 3}}) epoch = datetime.datetime.utcfromtimestamp(0) dt = 7 * 24 * 60 * 60 scores = [] done = 0 for page in pagesCursor: users = page['people'] try: users.sort(key=lambda x: x['created_time']) cluster = page['cluster'] liketime = [] for user in users: liketime.append((user['created_time'] - epoch).total_seconds()) back = 0 done += 1 print done for user in users: userId = user['id'] timeahead = (user['created_time'] - epoch).total_seconds() + dt timeback = (user['created_time'] - epoch).total_seconds() - dt ahead = bisect.bisect_right(liketime, timeahead) score = ahead - back back += 1 # print userId, cluster, score clusterInfluencerCollection.update({'_id': userId}, {'$push': {'cluster.' + str(cluster): score}}, upsert=False) except: print "hmmm"
from database import getPageCollection, getLikesCollection, getPagesClusterInfoCollection, getClusterCollection from pprint import pprint import dateutil.parser as dateparser allpages = getPageCollection() alllikes = getLikesCollection() fbpagesinfo = getPagesClusterInfoCollection() clusterinfo = getClusterCollection() fbpagesinfo.drop() counter = 0 for pageId in allpages.find(): cursor = alllikes.find({'data': {'$elemMatch': {'id': pageId['_id']}}}) cluster = clusterinfo.find_one({'pages': pageId['_id']}) cluster = cluster["cluster"] document = {'_id': pageId['_id'], 'people': [], 'count': cursor.count(), 'cluster': cluster} for c in cursor: dd = {'id': c['id']} for pages in c['data']: if pages['id'] == pageId['_id']: if 'created_time' in pages: dd['created_time'] = dateparser.parse(pages['created_time']) break document['people'].append(dd) counter += 1 print 'document', counter, 'done' fbpagesinfo.insert(document)
from database import getPageCollection, getLikesCollection, getPagesClusterInfoCollection, getClusterCollection from pprint import pprint import dateutil.parser as dateparser allpages = getPageCollection() alllikes = getLikesCollection() fbpagesinfo = getPagesClusterInfoCollection() clusterinfo = getClusterCollection() fbpagesinfo.drop() counter = 0 for pageId in allpages.find(): cursor = alllikes.find({'data': {'$elemMatch': {'id': pageId['_id']}}}) cluster = clusterinfo.find_one({'pages': pageId['_id']}) cluster = cluster["cluster"] document = { '_id': pageId['_id'], 'people': [], 'count': cursor.count(), 'cluster': cluster } for c in cursor: dd = {'id': c['id']} for pages in c['data']: if pages['id'] == pageId['_id']: if 'created_time' in pages: dd['created_time'] = dateparser.parse( pages['created_time']) break document['people'].append(dd)