class AsyncMongoDBPipeline(object): def __init__(self): host = settings['MONGODB_SERVER'] port = settings['MONGODB_PORT'] db = settings['MONGODB_EMAIL_DB'] collection = settings['MONGODB_EMAIL_COLLECTION'] self.connection = MongoConnectionPool(host=host, port=port) logger.info("Async MongoDB connection established...") self.db = self.connection[db] self.collection = self.db[collection] def evaluateIndex(self, indxs): if ('EmailIndex' in indxs) and (indxs['EmailIndex']['unique'] == True): return else: logging.info("Creating email index...") self.collection.create_index( qf.sort(qf.ASCENDING('Email')), name="EmailIndex", unique=True) @defer.inlineCallbacks def process_item(self, item, spider): try: yield self.collection.insert(dict(item)) except: print("Caught data exception in pipeline for", item["PageUrl"]) def close_spider(self, spider): spider.log("Spider %s closed, closing mongodb connection" % (spider.name), level=logging.INFO) self.connection.disconnect() def printError(self, err): logger.debug(err.getErrorMessage())
def pool(self): if not getattr(self, '_pool', None): mongo = yield MongoConnectionPool( host=self.settings.get('oauth_mongo_host', '127.0.0.1'), port=self.settings.get('oauth_mongo_port', 27017), reconnect=self.settings.get('oauth_mongo_reconnect', True), pool_size=self.settings.get('oauth_mongo_pool_size', 5)) self._pool = mongo defer.returnValue(self._pool)
def __init__(self): host = settings['MONGODB_SERVER'] port = settings['MONGODB_PORT'] db = settings['MONGODB_EMAIL_DB'] collection = settings['MONGODB_EMAIL_COLLECTION'] self.connection = MongoConnectionPool(host=host, port=port) logger.info("Async MongoDB connection established...") self.db = self.connection[db] self.collection = self.db[collection]
def __init__(self, hostname="127.0.0.1", port=27017): connection = MongoConnectionPool(host=hostname, port=port) self.collection = connection.benchmark.results