Example #1
0
 def __init__(self, uri=None):
     if uri:
         mongodb = ConnectionPool(uri=uri)
     else:
         mongodb = ConnectionPool()
     self.db = mongodb.Youdao.Words
     self.connections = 0
     self.MAX_CONNECTIONS = 1024
 def open_spider(self, spider):
     self.m = ConnectionPool(self.settings.get("MONGO"))
     if spider.name == "jetsetter":
         self.db = self.m.scripture.jsets
         self._processor = self.process_jset_item
     elif spider.name == "ustravelzoo":
         self._processor = self.process_tzoo_item
         self.db = self.m.scripture.tzoos
     elif spider.name == "hcom":
         self._processor = self.process_hotels_item
         self.db = self.m.scripture.hotels
         self.rooms_zh_db = self.m.scripture.hcom.zh.rooms
         self.rooms_en_db = self.m.scripture.hcom.en.rooms
     elif spider.name == "eventbrite":
         self._processor = self.process_eventbrite_item
         self.db = self.m.scripture.eventbrites
     elif spider.name == "booking":
         self._processor = self.process_booking_item
         self.db = self.m.scripture.bookings
     elif spider.name == "distributed_spider":
         self._processor = self.distributed_spider
         self.rds = redis.StrictRedis(host=redis_url.host,
                                      port=redis_url.port,
                                      db=db,
                                      password=redis_url.password)
     else:
         self._processor = lambda *args, **kwargs: None
Example #3
0
 def __init__(self, crawler):
     self.crawler = crawler
     mongo_uri = self.crawler.settings.get('MONGODB_URI',
                                           'mongodb://localhost')
     db_name = self.crawler.settings.get('MONGODB_DB',
                                         'scrapy_mongo_pipeline')
     self.ctx = ConnectionPool(mongo_uri)
     self.db = self.ctx[db_name]
 def open_spider(self, spider):
     self._db_client = yield ConnectionPool(self.db_uri)
     self._db = self._db_client[self.db_name]
     self._coll = self._db[self.coll_name]
     yield self._coll.find_one(timeout=True)
     for index in self.db_index:
         yield self._coll.create_index(qf.sort(index))
     logger.info('{storage} opened'.format(storage=self.__class__.__name__))
    def open_spider(self, spider: Spider):
        self.cnx = ConnectionPool(self.uri, codec_options=self.codec_options)
        self.db = getattr(self.cnx, self.settings[MONGODB_DATABASE])
        self.coll = getattr(self.db, self.settings[MONGODB_COLLECTION])
        self.coll.with_options(codec_options=self.codec_options)

        result = yield self.create_index(spider)
        logger.info('Spider opened: Open the connection to MongoDB: %s',
                    self.uri)
Example #6
0
    def test_AutoReconnect_from_primary_step_down(self):
        uri = "mongodb://localhost:{0}/?w={1}".format(self.ports[0], len(self.ports))
        conn = ConnectionPool(uri, max_delay=5)

        # this will force primary to step down, triggering an AutoReconnect that bubbles up
        # through the connection pool to the client
        command = conn.admin.command(SON([('replSetStepDown', 86400), ('force', 1)]))
        self.assertFailure(command, AutoReconnect)

        yield conn.disconnect()
def loadHomeZones():
    global client
    client = yield ConnectionPool(url)
    db = client.GsmSimulatedData
    col = db.PeopleHomeZones
    print("\033[92mLoading Home Zone data.....\033[0m")
    retVal = {}
    id_to_home = yield col.find()
    for val in id_to_home:
        retVal[val['id']] = val['zone']
    returnValue(retVal)
 def open_spider(self, spider):
     # Sync
     # self.client = pymongo.MongoClient(self.settings['MONGODB_URI'])
     # self.db = self.client[self.settings['MONGODB_DB']]
     # self.coll = self.db[self.settings['MONGODB_COLL_RAW']]
     # self.coll.create_index('request_url')
     # Async
     self.client = yield ConnectionPool(self.settings['MONGODB_URI'])
     self.db = self.client[self.settings['MONGODB_DB']]
     self.coll = self.db[self.settings['MONGODB_COLL_RAW']]
     self.coll.create_index(sort([('request_url', 1)]))
Example #9
0
    def test_ConnectionUrlParams(self):
        conn = ConnectionPool("mongodb://{0}:{1}/?w=2&j=true".format(
            mongo_host, mongo_port))
        coll = conn.mydb.mycol

        try:
            with self.mock_gle() as mock:
                yield coll.insert({'x': 42})
                mock.assert_called_once_with('mydb', w=2, j=True)
        finally:
            yield coll.drop()
            yield conn.disconnect()
Example #10
0
    def open_spider(self, spider: Spider):
        self.cnx = yield ConnectionPool(self.uri,
                                        codec_options=self.codec_options)
        self.db = yield getattr(
            self.cnx, self.settings.get(SEEDS_MONGODB_DATABASE, 'seeds'))
        self.coll = yield getattr(
            self.db, self.settings.get(SEEDS_MONGODB_COLLECTION, 'seeds'))

        yield self.coll.with_options(codec_options=self.codec_options)

        logger.info('Spider opened: Open the connection to MongoDB: %s',
                    self.uri)
Example #11
0
    def test_SlaveOk(self):
        uri = "mongodb://localhost:{0}/?readPreference=secondaryPreferred".format(self.ports[1])
        conn = ConnectionPool(uri)
        try:
            empty = yield conn.db.coll.find(flags=QUERY_SLAVE_OK)
            self.assertEqual(empty, [])

            server_status = yield conn.admin.command("serverStatus")
            _version = [int(part) for part in server_status["version"].split('.')]

            expected_error = AutoReconnect if _version > [4, 2] else OperationFailure
            yield self.assertFailure(conn.db.coll.insert({'x': 42}), expected_error)
        finally:
            yield conn.disconnect()
def example():
    # tls_ctx = ServerTLSContext(privateKeyFileName='./mongodb.key', certificateFileName='./mongodb.crt')
    mongodb_uri = "mongodb://192.168.10.57:27017"

    # mongo = yield ConnectionPool(mongodb_uri, ssl_context_factory=tls_ctx)
    mongo = yield ConnectionPool(mongodb_uri)
    foo = mongo.foo  # `foo` database
    test = foo.test  # `test` collection

    # fetch some documents
    # yield test.insert({"title": "sb", "content": "sb"})
    docs = yield test.find(limit=10)
    for doc in docs:
        print(doc)
Example #13
0
def insert_item(spider_name: str,
                item: Type[Union[Item, dict]],
                connection_pool: Optional[ConnectionPool] = None):
    if not connection_pool:
        mongo = yield ConnectionPool(dync_settings['MONGO_URI'])
    else:
        mongo = connection_pool

    db = mongo[dync_settings.MONGO_DB_NAME]
    collection = db[spider_name]

    if isinstance(item, ImageItem):
        collection = db[dync_settings.IMAGE_COLLECTION]

    # TODO: 一个spider可能返回不同的item,需要存储在不同的collection里面

    yield collection.insert(dict(item), )
Example #14
0
    def setUp(self):
        self.__mongod = [
            Mongod(port=p, replset=self.rsname) for p in self.ports
        ]
        yield defer.gatherResults([mongo.start() for mongo in self.__mongod])

        yield defer.gatherResults(
            [self.__check_reachable(port) for port in self.ports])

        master_uri = "mongodb://localhost:{0}/?readPreference=secondaryPreferred".format(
            self.ports[0])
        master = ConnectionPool(master_uri)
        yield master.admin.command("replSetInitiate", self.rsconfig)

        ready = False
        n_tries = int(self.__init_timeout / self.__ping_interval)
        for i in range(n_tries):
            yield self.__sleep(self.__ping_interval)

            # My practice shows that we need to query both ismaster and replSetGetStatus
            # to be sure that replica set is up and running, primary is elected and all
            # secondaries are in sync and ready to became new primary

            ismaster_req = master.admin.command("ismaster", check=False)
            replstatus_req = master.admin.command("replSetGetStatus",
                                                  check=False)
            ismaster, replstatus = yield defer.gatherResults(
                [ismaster_req, replstatus_req])

            initialized = replstatus["ok"]
            ok_states = {"PRIMARY", "SECONDARY"}
            states_ready = all(m["stateStr"] in ok_states
                               for m in replstatus.get("members", []))
            ready = initialized and ismaster["ismaster"] and states_ready

            if ready:
                break

        if not ready:
            yield self.tearDown()
            raise Exception(
                "ReplicaSet initialization took more than {0}s".format(
                    self.__init_timeout))

        yield master.disconnect()
Example #15
0
def storeLocData():

    client = ConnectionPool("mongodb://localhost:27017")
    db1 = client.GsmSimulatedData
    db2 = client.TestingData
    col = db2.PeopleLocationData

    persons = {}
    pbar = ProgressBar()

    idToHome = yield db1.PeopleHomeZones.find()

    for val in idToHome:
        persons[val['id']] = {'zone': val['zone']}

    for pid in pbar(persons.keys()):
        pac = yield db1.RawPackets.find(spec={'id': pid, 'tower.zone': persons[pid]['zone']}, limit=1)
        pac = pac[0]
        persons[pid]['loc'] = [pac['tower']['lat'], pac['tower']['lon']]
        yield col.insert_one({'id': pid, 'zone': persons[pid]['zone'], 'loc': persons[pid]['loc']})
Example #16
0
    def test_AutoReconnect(self):
        try:
            uri = "mongodb://localhost:{0}/?w={1}".format(self.ports[0], len(self.ports))
            conn = ConnectionPool(uri, max_delay=5)

            yield conn.db.coll.insert({'x': 42}, safe=True)

            self.__mongod[0].kill(signal.SIGSTOP)

            while True:
                try:
                    result = yield conn.db.coll.find_one()
                    self.assertEqual(result['x'], 42)
                    break
                except AutoReconnect:
                    pass

        finally:
            self.__mongod[0].kill(signal.SIGCONT)
            yield conn.disconnect()
            self.flushLoggedErrors(AutoReconnect)
Example #17
0
def connect_mongodb(host, port):
    """
    Run :py:func:`~.setup_mongodb`. If that succeeds, connect to MongoDB via
    ``txmongo``. Return a txmongo ConnectionPool.

    :param host: host to connect to MongoDB on.
    :type host: str
    :param port: port to connect to MongoDB on.
    :type port: int
    :return: MongoDB connection pool
    :rtype: txmongo.connection.ConnectionPool
    """
    setup_mongodb(host, port)
    uri = 'mongodb://%s:%d' % (host, port)
    logger.info('Connecting to MongoDB via txmongo at %s', uri)
    try:
        conn = ConnectionPool(uri=uri)
    except:
        logger.critical('Error connecting to MongoDB at %s', uri, exc_info=1)
        raise SystemExit(2)
    return conn