def getFullInfluenceScore(): partialScoreCollection = getClusterInfluencerScoreCollection() userClusterContributionCollection = getUserClusterContributionCollection() fbfriends = getFriendsCollection() fullScoreCollection = getFullInfluenceScoreCollection() for friend in fbfriends.find(): fid = friend['id'] partial = partialScoreCollection.find_one({'_id': fid}) contribution = userClusterContributionCollection.find_one({'_id': fid}) score = [] for i in range(len(contribution['contribution'])): avg = 1.0 try: avg *= sum(partial['cluster'][str(i)]) / len( partial['cluster'][str(i)]) except ZeroDivisionError: avg = 0.0 score.append(avg * contribution['contribution'][i]) if len(score) > 0: document = {} document['_id'] = fid document['name'] = friend['name'] document['score'] = score fullScoreCollection.insert(document) print friend['name'], score
def getFullInfluenceScore(): partialScoreCollection = getClusterInfluencerScoreCollection() userClusterContributionCollection = getUserClusterContributionCollection() fbfriends = getFriendsCollection() fullScoreCollection = getFullInfluenceScoreCollection() for friend in fbfriends.find(): fid = friend['id'] partial = partialScoreCollection.find_one({'_id' : fid}) contribution = userClusterContributionCollection.find_one({'_id' : fid}) score = [] for i in range(len(contribution['contribution'])): avg = 1.0 try: avg *= sum(partial['cluster'][str(i)])/len(partial['cluster'][str(i)]) except ZeroDivisionError: avg = 0.0 score.append(avg*contribution['contribution'][i]) if len(score) > 0: document = {} document['_id'] = fid document['name'] = friend['name'] document['score'] = score fullScoreCollection.insert(document) print friend['name'], score
def getClusterContribution(): clusterInfluencerScoreCollection = getClusterInfluencerScoreCollection() userClusterContributionCollection = getUserClusterContributionCollection() userClusterContributionCollection.drop() for scores in clusterInfluencerScoreCollection.find(): document = {} document['_id'] = scores['_id'] document['contribution'] = [] for cscore in sorted(scores['cluster'].keys()): document['contribution'].append(len(scores['cluster'][cscore])) document['contribution'] = [i*1.0/sum(document['contribution']) for i in document['contribution'] if sum(document['contribution']) > 0] print 'Calculated for', userClusterContributionCollection.insert(document)
def getPartialInfluenceScore(): clusterInfoCollection = getPagesClusterInfoCollection() clusterInfluencerCollection = getClusterInfluencerScoreCollection() friendsCollection = getFriendsCollection() clusterInfluencerCollection.drop() clusterNumber = len(clusterInfoCollection.distinct('cluster')) for friend in friendsCollection.find(): _id = friend['id'] document = {} document['_id'] = _id document['cluster'] = {} for i in range(clusterNumber): document['cluster'][str(i)] = [] clusterInfluencerCollection.insert(document) pagesCursor = clusterInfoCollection.find({"count": {"$gt": 3}}) epoch = datetime.datetime.utcfromtimestamp(0) dt = 7 * 24 * 60 * 60 scores = [] done = 0 for page in pagesCursor: users = page['people'] try: users.sort(key=lambda x: x['created_time']) cluster = page['cluster'] liketime = [] for user in users: liketime.append((user['created_time'] - epoch).total_seconds()) back = 0 done += 1 print done for user in users: userId = user['id'] timeahead = (user['created_time'] - epoch).total_seconds() + dt timeback = (user['created_time'] - epoch).total_seconds() - dt ahead = bisect.bisect_right(liketime, timeahead) score = ahead - back back += 1 # print userId, cluster, score clusterInfluencerCollection.update( {'_id': userId}, {'$push': { 'cluster.' + str(cluster): score }}, upsert=False) except: print "hmmm"
def getPartialInfluenceScore(): clusterInfoCollection = getPagesClusterInfoCollection() clusterInfluencerCollection = getClusterInfluencerScoreCollection() friendsCollection = getFriendsCollection() clusterInfluencerCollection.drop() clusterNumber = len(clusterInfoCollection.distinct('cluster')) for friend in friendsCollection.find(): _id = friend['id'] document = {} document['_id'] = _id document['cluster'] = {} for i in range(clusterNumber): document['cluster'][str(i)] = [] clusterInfluencerCollection.insert(document) pagesCursor = clusterInfoCollection.find({"count": {"$gt": 3}}) epoch = datetime.datetime.utcfromtimestamp(0) dt = 7 * 24 * 60 * 60 scores = [] done = 0 for page in pagesCursor: users = page['people'] try: users.sort(key=lambda x: x['created_time']) cluster = page['cluster'] liketime = [] for user in users: liketime.append((user['created_time'] - epoch).total_seconds()) back = 0 done += 1 print done for user in users: userId = user['id'] timeahead = (user['created_time'] - epoch).total_seconds() + dt timeback = (user['created_time'] - epoch).total_seconds() - dt ahead = bisect.bisect_right(liketime, timeahead) score = ahead - back back += 1 # print userId, cluster, score clusterInfluencerCollection.update({'_id': userId}, {'$push': {'cluster.' + str(cluster): score}}, upsert=False) except: print "hmmm"
def getClusterContribution(): clusterInfluencerScoreCollection = getClusterInfluencerScoreCollection() userClusterContributionCollection = getUserClusterContributionCollection() userClusterContributionCollection.drop() for scores in clusterInfluencerScoreCollection.find(): document = {} document['_id'] = scores['_id'] document['contribution'] = [] for cscore in sorted(scores['cluster'].keys()): document['contribution'].append(len(scores['cluster'][cscore])) document['contribution'] = [ i * 1.0 / sum(document['contribution']) for i in document['contribution'] if sum(document['contribution']) > 0 ] print 'Calculated for', userClusterContributionCollection.insert( document)