def clusterLevelResults(): scoreCollection = getFullInfluenceScoreCollection() clusterResCollection = getclusterLevelResultCollection() numCluster = len(scoreCollection.find_one()['score']) clusterResCollection.drop() for i in range(numCluster): doc = {} i = str(i) doc['_id'] = i doc['users'] = [] clusterResCollection.insert(doc) for scores in scoreCollection.find(): fid = scores['_id'] name = scores['name'] userScore = scores['score'] for i in range(len(userScore)): doc = {} doc['id'] = fid doc['name'] = name doc['score'] = userScore[i] cluster = str(i) print i, doc clusterResCollection.update({'_id': cluster}, { '$push': { 'users': { '$each': [doc], '$sort': { 'score': -1 }, '$slice': -1000 } } }, upsert=False)
def clusterLevelResults(): scoreCollection = getFullInfluenceScoreCollection() clusterResCollection = getclusterLevelResultCollection() numCluster = len(scoreCollection.find_one()['score']) clusterResCollection.drop() for i in range(numCluster): doc = {} i = str(i) doc['_id'] = i doc['users'] = [] clusterResCollection.insert(doc) for scores in scoreCollection.find(): fid = scores['_id'] name = scores['name'] userScore = scores['score'] for i in range(len(userScore)): doc = {} doc['id'] = fid doc['name'] = name doc['score'] = userScore[i] cluster = str(i) print i, doc clusterResCollection.update({'_id': cluster}, { '$push': { 'users': { '$each': [doc], '$sort': {'score': -1}, '$slice': -1000 } } } , upsert=False)
# collection.drop() for j, c in enumerate(clusters): # print("cluster %d:" % j) array = [] for i in c: # print("\t%s" % args[i]) array.append(args[i]) if cleanName(args[i]) == PID: cluster = j break array = map(cleanName, array) doc = {'cluster': j, 'pages': array} # print "Cluster", cluster clusterCollection = getclusterLevelResultCollection() count = 0 f = open('tempfile.txt', 'w') f.write("Influencers for this cluster are\n") ct = 0 for influ in clusterCollection.find({'_id': str(cluster)}): for each in influ['users']: ct += 1 if ct <= 5: f.write(each['name'], each['id'], '\n') else: break print 'done'
# collection.drop() for j, c in enumerate(clusters): # print("cluster %d:" % j) array = [] for i in c: # print("\t%s" % args[i]) array.append(args[i]) if cleanName(args[i]) == PID: cluster = j break array = map(cleanName, array) doc = {'cluster': j, 'pages': array} # print "Cluster", cluster clusterCollection = getclusterLevelResultCollection() count = 0 f = open('tempfile.txt', 'w') f.write("Influencers for this cluster are\n") ct = 0 for influ in clusterCollection.find({'_id': str(cluster)}): for each in influ['users']: ct += 1 if ct <= 5: f.write(each['name'], each['id'], '\n') else: break print 'done'