Esempio n. 1
0
class OutputQueue(object):
    def __init__(self, mongo_host, mongo_db, mongo_col, jobid):
        col = pymongo.Connection(mongo_host)[mongo_db][mongo_col]
        self.q = MongoPageQueue(col, jobid)

    @classmethod
    def from_crawler(cls, crawler):
        mongo_host = crawler.settings['MONGO_HOST']
        mongo_db = crawler.settings['MONGO_DB']
        mongo_col = crawler.settings['MONGO_QUEUE_COL']
        jobid = crawler.settings['JOBID']
        return cls(mongo_host, mongo_db, mongo_col, jobid)

    def process_item(self, item, spider):
        self.q.push(dict(item))
        return item
class OutputQueue(object):

    def __init__(self, mongo_host, mongo_db, mongo_col, jobid):
        col = pymongo.Connection(mongo_host)[mongo_db][mongo_col]
        self.q = MongoPageQueue(col, jobid)

    @classmethod
    def from_crawler(cls, crawler):
        mongo_host = crawler.settings['MONGO_HOST']
        mongo_db = crawler.settings['MONGO_DB']
        mongo_col = crawler.settings['MONGO_QUEUE_COL']
        jobid = crawler.settings['JOBID']
        return cls(mongo_host, mongo_db, mongo_col, jobid)

    def process_item(self, item, spider):
        self.q.push(dict(item))
        return item
Esempio n. 3
0
 def setUp(self):
     c = MongoClient()
     c.drop_database('hci-test')
     col = c['hci-test']['crawler.pages']
     self.pagequeue = MongoPageQueue(col, 'JOBID')
Esempio n. 4
0
 def __init__(self, mongo_host, mongo_db, mongo_col, jobid):
     col = pymongo.Connection(mongo_host)[mongo_db][mongo_col]
     self.q = MongoPageQueue(col, jobid)
 def __init__(self, mongo_host, mongo_db, mongo_col, jobid):
     col = pymongo.Connection(mongo_host)[mongo_db][mongo_col]
     self.q = MongoPageQueue(col, jobid)
Esempio n. 6
0
 def setUp(self):
     c = pymongo.Connection()
     c.drop_database('hci-test')
     col = c['hci-test']['crawler.pages']
     self.pagequeue = MongoPageQueue(col, 'JOBID')