Example #1
0
    def setUp(self):
        self.__mongod = [Mongod(port=p, replset=self.rsname) for p in self.ports]
        yield defer.gatherResults([mongo.start() for mongo in self.__mongod])

        master_uri = "mongodb://localhost:{0}/?readPreference=secondaryPreferred".format(self.ports[0])
        master = ConnectionPool(master_uri)
        yield master.admin.command("replSetInitiate", self.rsconfig)

        ready = False
        n_tries = int(self.__init_timeout / self.__ping_interval)
        for i in xrange(n_tries):
            yield self.__sleep(self.__ping_interval)

            # My practice shows that we need to query both ismaster and replSetGetStatus
            # to be sure that replica set is up and running, primary is elected and all
            # secondaries are in sync and ready to became new primary

            ismaster_req = master.admin.command("ismaster", check=False)
            replstatus_req = master.admin.command("replSetGetStatus", check=False)
            ismaster, replstatus = yield defer.gatherResults([ismaster_req, replstatus_req])

            initialized = replstatus["ok"]
            ok_states = set(["PRIMARY", "SECONDARY"])
            states_ready = all(m["stateStr"] in ok_states for m in replstatus.get("members", []))
            ready = initialized and ismaster["ismaster"] and states_ready

            if ready:
                break

        if not ready:
            yield self.tearDown()
            raise Exception("ReplicaSet initialization took more than {0}s".format(self.__init_timeout))

        yield master.disconnect()
Example #2
0
 def __init__(self, uri=None):
     if uri:
         mongodb = ConnectionPool(uri=uri)
     else:
         mongodb = ConnectionPool()
     self.db = mongodb.Youdao.Words
     self.connections = 0
     self.MAX_CONNECTIONS = 1024
    def open_spider(self, spider: Spider):
        self.cnx = ConnectionPool(self.uri, codec_options=self.codec_options)
        self.db = getattr(self.cnx, self.settings[MONGODB_DATABASE])
        self.coll = getattr(self.db, self.settings[MONGODB_COLLECTION])
        self.coll.with_options(codec_options=self.codec_options)

        result = yield self.create_index(spider)
        logger.info('Spider opened: Open the connection to MongoDB: %s',
                    self.uri)
Example #4
0
    def test_AutoReconnect_from_primary_step_down(self):
        uri = "mongodb://localhost:{0}/?w={1}".format(self.ports[0], len(self.ports))
        conn = ConnectionPool(uri, max_delay=5)

        # this will force primary to step down, triggering an AutoReconnect that bubbles up
        # through the connection pool to the client
        command = conn.admin.command(SON([('replSetStepDown', 86400), ('force', 1)]))
        self.assertFailure(command, AutoReconnect)

        yield conn.disconnect()
Example #5
0
    def test_SlaveOk(self):
        uri = "mongodb://localhost:{0}/?readPreference=secondaryPreferred".format(self.ports[1])
        conn = ConnectionPool(uri)
        try:
            empty = yield conn.db.coll.find(flags=QUERY_SLAVE_OK)
            self.assertEqual(empty, [])

            yield self.assertFailure(conn.db.coll.insert({'x': 42}), OperationFailure)
        finally:
            yield conn.disconnect()
Example #6
0
    def test_ConnectionUrlParams(self):
        conn = ConnectionPool("mongodb://{0}:{1}/?w=2&j=true".format(mongo_host, mongo_port))
        coll = conn.mydb.mycol

        try:
            with self.mock_gle() as mock:
                yield coll.insert({'x': 42})
                mock.assert_called_once_with('mydb', w=2, j=True)
        finally:
            yield coll.drop()
            yield conn.disconnect()
Example #7
0
    def test_AutoReconnect_from_primary_step_down(self):
        self.patch(_Connection, 'maxDelay', 5)
        uri = "mongodb://localhost:{0}/?w={1}".format(self.ports[0], len(self.ports))
        conn = ConnectionPool(uri)

        # this will force primary to step down, triggering an AutoReconnect that bubbles up
        # through the connection pool to the client
        command = conn.admin.command(SON([('replSetStepDown', 86400), ('force', 1)]))
        self.assertFailure(command, AutoReconnect)

        yield conn.disconnect()
Example #8
0
    def test_ConnectionUrlParams(self):
        conn = ConnectionPool("mongodb://{0}:{1}/?w=2&j=true".format(
            mongo_host, mongo_port))
        coll = conn.mydb.mycol

        try:
            with self.mock_gle() as mock:
                yield coll.insert({'x': 42})
                mock.assert_called_once_with('mydb', w=2, j=True)
        finally:
            yield coll.drop()
            yield conn.disconnect()
Example #9
0
    def test_SlaveOk(self):
        uri = "mongodb://localhost:{0}/?readPreference=secondaryPreferred".format(self.ports[1])
        conn = ConnectionPool(uri)
        try:
            empty = yield conn.db.coll.find(flags=QUERY_SLAVE_OK)
            self.assertEqual(empty, [])

            server_status = yield conn.admin.command("serverStatus")
            _version = [int(part) for part in server_status["version"].split('.')]

            expected_error = AutoReconnect if _version > [4, 2] else OperationFailure
            yield self.assertFailure(conn.db.coll.insert({'x': 42}), expected_error)
        finally:
            yield conn.disconnect()
Example #10
0
    def __init__(self, config):
        """Opens a MongoDB connection pool"""

        # Report connection error only once
        self.report_connection_error = True

        mongo_url, mongo_db, mongo_collection = config
        if 'mongo:' in mongo_url:
            mongo_url = mongo_url.replace('mongo:', 'mongodb:')
        # Setup MongoDB Connection
        self.mongo_url = mongo_url
        self.connection = ConnectionPool(mongo_url, connect_timeout=5)
        self.mongo_db = self.connection[mongo_db]
        self.collection = self.mongo_db[mongo_collection]
Example #11
0
    def test_InvalidRSName(self):
        uri = "mongodb://localhost:{0}/?replicaSet={1}_X".format(self.ports[0], self.rsname)

        ok = defer.Deferred()

        def proto_fail(self, exception):
            conn.disconnect()

            if type(exception) == ConfigurationError:
                ok.callback(None)
            else:
                ok.errback(exception)

        self.patch(MongoProtocol, "fail", proto_fail)

        conn = ConnectionPool(uri)

        @defer.inlineCallbacks
        def do_query():
            yield conn.db.coll.insert({'x': 42})
            raise Exception("You shall not pass!")

        yield defer.DeferredList([ok, do_query()],
                                 fireOnOneCallback=True,
                                 fireOnOneErrback=True)
        self.flushLoggedErrors(AutoReconnect)
Example #12
0
 def open_spider(self, spider):
     self.m = ConnectionPool(self.settings.get("MONGO"))
     if spider.name == "jetsetter":
         self.db = self.m.scripture.jsets
         self._processor = self.process_jset_item
     elif spider.name == "ustravelzoo":
         self._processor = self.process_tzoo_item
         self.db = self.m.scripture.tzoos
     elif spider.name == "hcom":
         self._processor = self.process_hotels_item
         self.db = self.m.scripture.hotels
         self.rooms_zh_db = self.m.scripture.hcom.zh.rooms
         self.rooms_en_db = self.m.scripture.hcom.en.rooms
     elif spider.name == "eventbrite":
         self._processor = self.process_eventbrite_item
         self.db = self.m.scripture.eventbrites
     elif spider.name == "booking":
         self._processor = self.process_booking_item
         self.db = self.m.scripture.bookings
     elif spider.name == "distributed_spider":
         self._processor = self.distributed_spider
         self.rds = redis.StrictRedis(host=redis_url.host,
                                      port=redis_url.port,
                                      db=db,
                                      password=redis_url.password)
     else:
         self._processor = lambda *args, **kwargs: None
Example #13
0
 def __init__(self, crawler):
     self.crawler = crawler
     mongo_uri = self.crawler.settings.get('MONGODB_URI',
                                           'mongodb://localhost')
     db_name = self.crawler.settings.get('MONGODB_DB',
                                         'scrapy_mongo_pipeline')
     self.ctx = ConnectionPool(mongo_uri)
     self.db = self.ctx[db_name]
 def open_spider(self, spider):
     self._db_client = yield ConnectionPool(self.db_uri)
     self._db = self._db_client[self.db_name]
     self._coll = self._db[self.coll_name]
     yield self._coll.find_one(timeout=True)
     for index in self.db_index:
         yield self._coll.create_index(qf.sort(index))
     logger.info('{storage} opened'.format(storage=self.__class__.__name__))
 def onConnect(self):
     self.url = 'mongodb://localhost:27017'
     self.mongo = yield ConnectionPool(self.url)
     self.join(self.config.realm)
     self.title = "Test Data"
     m = hashlib.md5()
     m.update(self.title)
     self._id = m.hexdigest()
Example #16
0
    def setUp(self):
        self.__mongod = [
            Mongod(port=p, replset=self.rsname) for p in self.ports
        ]
        yield defer.gatherResults([mongo.start() for mongo in self.__mongod])

        yield defer.gatherResults(
            [self.__check_reachable(port) for port in self.ports])

        master_uri = "mongodb://localhost:{0}/?readPreference=secondaryPreferred".format(
            self.ports[0])
        master = ConnectionPool(master_uri)
        yield master.admin.command("replSetInitiate", self.rsconfig)

        ready = False
        n_tries = int(self.__init_timeout / self.__ping_interval)
        for i in range(n_tries):
            yield self.__sleep(self.__ping_interval)

            # My practice shows that we need to query both ismaster and replSetGetStatus
            # to be sure that replica set is up and running, primary is elected and all
            # secondaries are in sync and ready to became new primary

            ismaster_req = master.admin.command("ismaster", check=False)
            replstatus_req = master.admin.command("replSetGetStatus",
                                                  check=False)
            ismaster, replstatus = yield defer.gatherResults(
                [ismaster_req, replstatus_req])

            initialized = replstatus["ok"]
            ok_states = {"PRIMARY", "SECONDARY"}
            states_ready = all(m["stateStr"] in ok_states
                               for m in replstatus.get("members", []))
            ready = initialized and ismaster["ismaster"] and states_ready

            if ready:
                break

        if not ready:
            yield self.tearDown()
            raise Exception(
                "ReplicaSet initialization took more than {0}s".format(
                    self.__init_timeout))

        yield master.disconnect()
 def open_spider(self, spider):
     # Sync
     # self.client = pymongo.MongoClient(self.settings['MONGODB_URI'])
     # self.db = self.client[self.settings['MONGODB_DB']]
     # self.coll = self.db[self.settings['MONGODB_COLL_RAW']]
     # self.coll.create_index('request_url')
     # Async
     self.client = yield ConnectionPool(self.settings['MONGODB_URI'])
     self.db = self.client[self.settings['MONGODB_DB']]
     self.coll = self.db[self.settings['MONGODB_COLL_RAW']]
     self.coll.create_index(sort([('request_url', 1)]))
def loadHomeZones():
    global client
    client = yield ConnectionPool(url)
    db = client.GsmSimulatedData
    col = db.PeopleHomeZones
    print("\033[92mLoading Home Zone data.....\033[0m")
    retVal = {}
    id_to_home = yield col.find()
    for val in id_to_home:
        retVal[val['id']] = val['zone']
    returnValue(retVal)
Example #19
0
    def open_spider(self, spider: Spider):
        self.cnx = yield ConnectionPool(self.uri,
                                        codec_options=self.codec_options)
        self.db = yield getattr(
            self.cnx, self.settings.get(SEEDS_MONGODB_DATABASE, 'seeds'))
        self.coll = yield getattr(
            self.db, self.settings.get(SEEDS_MONGODB_COLLECTION, 'seeds'))

        yield self.coll.with_options(codec_options=self.codec_options)

        logger.info('Spider opened: Open the connection to MongoDB: %s',
                    self.uri)
Example #20
0
    def test_AutoReconnect(self):
        try:
            uri = "mongodb://localhost:{0}/?w={1}".format(self.ports[0], len(self.ports))
            conn = ConnectionPool(uri, max_delay=5)

            yield conn.db.coll.insert({'x': 42}, safe=True)

            self.__mongod[0].kill(signal.SIGSTOP)

            while True:
                try:
                    result = yield conn.db.coll.find_one()
                    self.assertEqual(result['x'], 42)
                    break
                except AutoReconnect:
                    pass

        finally:
            self.__mongod[0].kill(signal.SIGCONT)
            yield conn.disconnect()
            self.flushLoggedErrors(AutoReconnect)
Example #21
0
    def test_AutoReconnect(self):
        self.patch(_Connection, 'maxDelay', 5)

        try:
            uri = "mongodb://localhost:{0}/?w={1}".format(self.ports[0], len(self.ports))
            conn = ConnectionPool(uri)

            yield conn.db.coll.insert({'x': 42}, safe=True)

            yield self.__mongod[0].stop()

            while True:
                try:
                    result = yield conn.db.coll.find_one()
                    self.assertEqual(result['x'], 42)
                    break
                except AutoReconnect:
                    pass

        finally:
            yield conn.disconnect()
            self.flushLoggedErrors(AutoReconnect)
Example #22
0
    def test_TimeExceeded_insert(self):
        try:
            uri = "mongodb://localhost:{0}/?w={1}".format(self.ports[0], len(self.ports))
            conn = ConnectionPool(uri, retry_delay=3, max_delay=5)

            yield conn.db.coll.insert({'x': 42}, safe=True)

            self.__mongod[0].kill(signal.SIGSTOP)

            while True:
                try:
                    yield conn.db.coll.insert({'y': 42}, safe=True, timeout=2)
                    self.fail("TimeExceeded not raised!")
                except TimeExceeded:
                    break  # this is what we should have returned
                except AutoReconnect:
                    pass

        finally:
            self.__mongod[0].kill(signal.SIGCONT)
            yield conn.disconnect()
            self.flushLoggedErrors(AutoReconnect)
Example #23
0
    def test_TimeExceeded_insert(self):
        self.patch(_Connection, 'maxDelay', 5)

        try:
            uri = "mongodb://localhost:{0}/?w={1}".format(self.ports[0], len(self.ports))
            conn = ConnectionPool(uri, initial_delay=3)

            yield conn.db.coll.insert({'x': 42}, safe=True)

            yield self.__mongod[0].stop()

            while True:
                try:
                    yield conn.db.coll.insert({'y': 42}, safe=True, timeout=2)
                    self.fail("TimeExceeded not raised!")
                except TimeExceeded:
                    break  # this is what we should have returned
                except AutoReconnect:
                    pass

        finally:
            yield conn.disconnect()
            self.flushLoggedErrors(AutoReconnect)
def example():
    # tls_ctx = ServerTLSContext(privateKeyFileName='./mongodb.key', certificateFileName='./mongodb.crt')
    mongodb_uri = "mongodb://192.168.10.57:27017"

    # mongo = yield ConnectionPool(mongodb_uri, ssl_context_factory=tls_ctx)
    mongo = yield ConnectionPool(mongodb_uri)
    foo = mongo.foo  # `foo` database
    test = foo.test  # `test` collection

    # fetch some documents
    # yield test.insert({"title": "sb", "content": "sb"})
    docs = yield test.find(limit=10)
    for doc in docs:
        print(doc)
Example #25
0
def example():
    tls_ctx = ServerTLSContext(privateKeyFileName='./mongodb.key',
                               certificateFileName='./mongodb.crt')
    mongodb_uri = "mongodb://localhost:27017"

    mongo = yield ConnectionPool(mongodb_uri, ssl_context_factory=tls_ctx)

    foo = mongo.foo  # `foo` database
    test = foo.test  # `test` collection

    # fetch some documents
    docs = yield test.find(limit=10)
    for doc in docs:
        print doc
Example #26
0
    def open_spider(self, spider):
        # input instance of ssl_context_factory as kwargs
        if inspect.isclass(self.connection_kwargs.get('ssl_context_factory')):
            self.connection_kwargs['ssl_context_factory'] = \
                self.connection_kwargs['ssl_context_factory']()

        self._db_client = yield ConnectionPool(self.db_uri,
                                               **self.connection_kwargs)
        self._db = self._db_client[self.db_name]
        self._coll = self._db[self.coll_name]
        yield self._coll.find_one(timeout=True)
        for index in self.db_index:
            yield self._coll.create_index(qf.sort(index))
        self.logger.info(
            '{storage} opened'.format(storage=self.__class__.__name__))
Example #27
0
def insert_item(spider_name: str,
                item: Type[Union[Item, dict]],
                connection_pool: Optional[ConnectionPool] = None):
    if not connection_pool:
        mongo = yield ConnectionPool(dync_settings['MONGO_URI'])
    else:
        mongo = connection_pool

    db = mongo[dync_settings.MONGO_DB_NAME]
    collection = db[spider_name]

    if isinstance(item, ImageItem):
        collection = db[dync_settings.IMAGE_COLLECTION]

    # TODO: 一个spider可能返回不同的item,需要存储在不同的collection里面

    yield collection.insert(dict(item), )
Example #28
0
def storeLocData():

    client = ConnectionPool("mongodb://localhost:27017")
    db1 = client.GsmSimulatedData
    db2 = client.TestingData
    col = db2.PeopleLocationData

    persons = {}
    pbar = ProgressBar()

    idToHome = yield db1.PeopleHomeZones.find()

    for val in idToHome:
        persons[val['id']] = {'zone': val['zone']}

    for pid in pbar(persons.keys()):
        pac = yield db1.RawPackets.find(spec={'id': pid, 'tower.zone': persons[pid]['zone']}, limit=1)
        pac = pac[0]
        persons[pid]['loc'] = [pac['tower']['lat'], pac['tower']['lon']]
        yield col.insert_one({'id': pid, 'zone': persons[pid]['zone'], 'loc': persons[pid]['loc']})
Example #29
0
def connect_mongodb(host, port):
    """
    Run :py:func:`~.setup_mongodb`. If that succeeds, connect to MongoDB via
    ``txmongo``. Return a txmongo ConnectionPool.

    :param host: host to connect to MongoDB on.
    :type host: str
    :param port: port to connect to MongoDB on.
    :type port: int
    :return: MongoDB connection pool
    :rtype: txmongo.connection.ConnectionPool
    """
    setup_mongodb(host, port)
    uri = 'mongodb://%s:%d' % (host, port)
    logger.info('Connecting to MongoDB via txmongo at %s', uri)
    try:
        conn = ConnectionPool(uri=uri)
    except:
        logger.critical('Error connecting to MongoDB at %s', uri, exc_info=1)
        raise SystemExit(2)
    return conn
 def connect(self):
     tls_ctx = self.ServerTLSContext(privateKeyFileName=self.privkey,
                                     certificateFileName=self.certfile)
     self.db = ConnectionPool(self.connect_url, ssl_context_factory=tls_ctx)
def getConnection(url=url):
    global cursor
    cursor = yield ConnectionPool(url)
Example #32
0
 def __init__(self, db_name, pool_size=10, **kwargs):
     logging.debug("Creating DB ConnectionPool(pool_size=%d, %s)",
                   pool_size, str(kwargs))
     self._pool = ConnectionPool(pool_size=pool_size, **kwargs)
     self._db = self.pool[db_name]
Example #33
0
 def __check_reachable(self, port):
     uri = "mongodb://localhost:{0}/?readPreference=secondaryPreferred".format(port)
     conn = ConnectionPool(uri)
     yield conn.admin.command("ismaster", check=False)
     yield conn.disconnect()
Example #34
0
class TestCancelIntegrated(unittest.TestCase):

    def setUp(self):
        self.conn = ConnectionPool()
        self.db = self.conn.db
        self.coll = self.db.coll

    @defer.inlineCallbacks
    def tearDown(self):
        yield self.coll.drop()
        yield self.conn.disconnect()

    @defer.inlineCallbacks
    def test_integration(self):
        # Our ConnectionPool is not actually connected yet, so on this
        # stage operations can be safely cancelled -- they won't be
        # sent to MongoDB at all. This test checks this.

        d1 = self.coll.insert_one({'x': 1})
        d2 = self.coll.insert_one({'x': 2})
        d3 = self.coll.insert_one({'x': 3})
        d4 = self.coll.insert_one({'x': 4})

        d1.cancel()
        d3.cancel()

        yield d4

        self.failureResultOf(d1, defer.CancelledError)
        self.assertTrue(d2.called)
        self.failureResultOf(d3, defer.CancelledError)

        docs = yield self.coll.distinct('x')
        self.assertEqual(set(docs), {2, 4})

    @defer.inlineCallbacks
    def test_remove(self):
        # Lets test cancellation of some dangerous operation for the peace
        # of mind. NB: remove can be cancelled only because ConnectionPool
        # is not connected yet.
        for i in range(10):
            self.coll.insert_one({'x': i})

        d1 = self.coll.remove({'x': {"$lt": 3}})
        d2 = self.coll.remove({'x': {"$gte": 3, "$lt": 6}})
        d3 = self.coll.remove({'x': {"$gte": 6, "$lt": 9}})

        d2.cancel()

        yield d3

        self.assertTrue(d1.called)
        self.failureResultOf(d2, defer.CancelledError)

        x = yield self.coll.distinct('x')
        self.assertEqual(set(x), {3, 4, 5, 9})

    @defer.inlineCallbacks
    def test_no_way(self):
        # If ConnectionPool picks already active connection, the query is sent
        # to MongoDB immediately and there is no way to cancel it

        yield self.coll.count()

        d = self.coll.insert({'x': 42})
        d.cancel()

        yield _delay(1)

        self.failureResultOf(d, defer.CancelledError)

        cnt = yield self.coll.count()
        self.assertEqual(cnt, 1)
Example #35
0
 def setUp(self):
     self.conn = ConnectionPool()
     self.db = self.conn.db
     self.coll = self.db.coll