def getPages():
    likesCollection = getLikesCollection()
    pageCollection = getPageCollection()
    counter = 0
    for likes in likesCollection.find():
        likes = likes['data']
        for like in likes:
            page = {'_id': like['id']}
            pageCollection.update(page, page, upsert=True)
            counter += 1
            print counter
    print 'Total', counter, 'pages fetched'
from database import getPageCollection, getLikesCollection, getPagesClusterInfoCollection, getClusterCollection
from pprint import pprint
import dateutil.parser as dateparser

allpages = getPageCollection()
alllikes = getLikesCollection()
fbpagesinfo = getPagesClusterInfoCollection()
clusterinfo = getClusterCollection()

fbpagesinfo.drop()
counter = 0
for pageId in allpages.find():
    cursor = alllikes.find({'data': {'$elemMatch': {'id': pageId['_id']}}})
    cluster = clusterinfo.find_one({'pages': pageId['_id']})
    cluster = cluster["cluster"]

    document = {'_id': pageId['_id'], 'people': [], 'count': cursor.count(), 'cluster': cluster}
    for c in cursor:
        dd = {'id': c['id']}
        for pages in c['data']:
            if pages['id'] == pageId['_id']:
                if 'created_time' in pages:
                    dd['created_time'] = dateparser.parse(pages['created_time'])
                    break
        document['people'].append(dd)
    counter += 1
    print 'document', counter, 'done'
    fbpagesinfo.insert(document)
예제 #3
0
import requests
from database import getLikesCollection, getFriendsCollection
from utilities import url, access_token
from Queue import Queue
import threading
import json

idQueue = Queue()

likesCollection = getLikesCollection()
friendsCollection = getFriendsCollection()

for friend in friendsCollection.find():
    idQueue.put(friend['id'])


class getLikes(threading.Thread):
    def __init__(self):
        threading.Thread.__init__(self)
        self.queue = idQueue

    def run(self):
        while True:
            try:
                fbid = self.queue.get()
                rurl = url + '/v2.3/' + fbid
                response = requests.get(rurl,
                                        params={
                                            'access_token': access_token,
                                            'fields': 'likes'
                                        })
예제 #4
0
from database import getPageCollection, getLikesCollection, getPagesClusterInfoCollection, getClusterCollection
from pprint import pprint
import dateutil.parser as dateparser

allpages = getPageCollection()
alllikes = getLikesCollection()
fbpagesinfo = getPagesClusterInfoCollection()
clusterinfo = getClusterCollection()

fbpagesinfo.drop()
counter = 0
for pageId in allpages.find():
    cursor = alllikes.find({'data': {'$elemMatch': {'id': pageId['_id']}}})
    cluster = clusterinfo.find_one({'pages': pageId['_id']})
    cluster = cluster["cluster"]

    document = {
        '_id': pageId['_id'],
        'people': [],
        'count': cursor.count(),
        'cluster': cluster
    }
    for c in cursor:
        dd = {'id': c['id']}
        for pages in c['data']:
            if pages['id'] == pageId['_id']:
                if 'created_time' in pages:
                    dd['created_time'] = dateparser.parse(
                        pages['created_time'])
                    break
        document['people'].append(dd)