Python getPageCollection Examples

Programming Language: Python

Namespace/Package Name: database

Method/Function: getPageCollection

Examples at hotexamples.com: 5

Python getPageCollection - 5 examples found. These are the top rated real world Python examples of database.getPageCollection extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: getPages.py Project: wantengfeng/Finding-Influencers-in-Social-Networks

def getPages():
    likesCollection = getLikesCollection()
    pageCollection = getPageCollection()
    counter = 0
    for likes in likesCollection.find():
        likes = likes['data']
        for like in likes:
            page = {'_id': like['id']}
            pageCollection.update(page, page, upsert=True)
            counter += 1
            print counter
    print 'Total', counter, 'pages fetched'

Example #2

Show file

File: getPageData2.py Project: jiangxilong/Finding-Influencers-in-Social-Networks

def getPageData():
	pageCollection = getPageCollection()
	pageDataCollection = getPageDataCollection()

	queue = Queue()
	index = 1
	for page_id in pageCollection.find():
		if not pageDataCollection.find_one(page_id):
			queue.put(page_id['_id'])
			index += 1

	print index

	for i in range(200):
		t = fetchingPageData(queue)
		t.setDaemon(True)
		t.start()

	queue.join()

	for doc in pageDataCollection.find():
		f = open('data/' + doc['_id'] + '.txt','w')
		f.write(doc['data'])
		f.close()

Example #3

Show file

def getPageData():
    pageCollection = getPageCollection()
    pageDataCollection = getPageDataCollection()

    queue = Queue()
    index = 1
    for page_id in pageCollection.find():
        if not pageDataCollection.find_one(page_id):
            queue.put(page_id['_id'])
            index += 1

    print index

    for i in range(200):
        t = fetchingPageData(queue)
        t.setDaemon(True)
        t.start()

    queue.join()

    for doc in pageDataCollection.find():
        f = open('data/' + doc['_id'] + '.txt', 'w')
        f.write(doc['data'])
        f.close()

Example #4

Show file

File: pageAnalytics.py Project: ronilp/Finding-Influencers-in-Social-Networks

from database import getPageCollection, getLikesCollection, getPagesClusterInfoCollection, getClusterCollection
from pprint import pprint
import dateutil.parser as dateparser

allpages = getPageCollection()
alllikes = getLikesCollection()
fbpagesinfo = getPagesClusterInfoCollection()
clusterinfo = getClusterCollection()

fbpagesinfo.drop()
counter = 0
for pageId in allpages.find():
    cursor = alllikes.find({'data': {'$elemMatch': {'id': pageId['_id']}}})
    cluster = clusterinfo.find_one({'pages': pageId['_id']})
    cluster = cluster["cluster"]

    document = {'_id': pageId['_id'], 'people': [], 'count': cursor.count(), 'cluster': cluster}
    for c in cursor:
        dd = {'id': c['id']}
        for pages in c['data']:
            if pages['id'] == pageId['_id']:
                if 'created_time' in pages:
                    dd['created_time'] = dateparser.parse(pages['created_time'])
                    break
        document['people'].append(dd)
    counter += 1
    print 'document', counter, 'done'
    fbpagesinfo.insert(document)

Example #5

Show file

from database import getPageCollection, getLikesCollection, getPagesClusterInfoCollection, getClusterCollection
from pprint import pprint
import dateutil.parser as dateparser

allpages = getPageCollection()
alllikes = getLikesCollection()
fbpagesinfo = getPagesClusterInfoCollection()
clusterinfo = getClusterCollection()

fbpagesinfo.drop()
counter = 0
for pageId in allpages.find():
    cursor = alllikes.find({'data': {'$elemMatch': {'id': pageId['_id']}}})
    cluster = clusterinfo.find_one({'pages': pageId['_id']})
    cluster = cluster["cluster"]

    document = {
        '_id': pageId['_id'],
        'people': [],
        'count': cursor.count(),
        'cluster': cluster
    }
    for c in cursor:
        dd = {'id': c['id']}
        for pages in c['data']:
            if pages['id'] == pageId['_id']:
                if 'created_time' in pages:
                    dd['created_time'] = dateparser.parse(
                        pages['created_time'])
                    break
        document['people'].append(dd)