Esempio n. 1
0
    def test_skipped_oplog_entry_updates_checkpoint(self):
        repl_set = ReplicaSetSingle().start()
        conn = repl_set.client()
        opman = OplogThread(
            primary_client=conn,
            doc_managers=(DocManager(),),
            oplog_progress_dict=LockingDict(),
            namespace_config=NamespaceConfig(namespace_set=["test.test"]),
        )
        opman.start()

        # Insert a document into an included collection
        conn["test"]["test"].insert_one({"test": 1})
        last_ts = opman.get_last_oplog_timestamp()
        assert_soon(
            lambda: last_ts == opman.checkpoint,
            "OplogThread never updated checkpoint to non-skipped " "entry.",
        )
        self.assertEqual(len(opman.doc_managers[0]._search()), 1)

        # Make sure that the oplog thread updates its checkpoint on every
        # oplog entry.
        conn["test"]["ignored"].insert_one({"test": 1})
        last_ts = opman.get_last_oplog_timestamp()
        assert_soon(
            lambda: last_ts == opman.checkpoint,
            "OplogThread never updated checkpoint to skipped entry.",
        )
        opman.join()
        conn.close()
        repl_set.stop()
 def setUp(self):
     self.repl_set = ReplicaSet().start()
     self.primary_conn = self.repl_set.client()
     self.oplog_coll = self.primary_conn.local['oplog.rs']
     self.opman = OplogThread(primary_client=self.primary_conn,
                              doc_managers=(DocManager(), ),
                              oplog_progress_dict=LockingDict())
    def setUp(self):
        # Create a new oplog progress file
        try:
            os.unlink("oplog.timestamp")
        except OSError:
            pass
        open("oplog.timestamp", "w").close()

        # Start a replica set
        self.repl_set = ReplicaSet().start()
        # Connection to the replica set as a whole
        self.main_conn = self.repl_set.client()
        # Connection to the primary specifically
        self.primary_conn = self.repl_set.primary.client()
        # Connection to the secondary specifically
        self.secondary_conn = self.repl_set.secondary.client(
            read_preference=ReadPreference.SECONDARY_PREFERRED)

        # Wipe any test data
        self.main_conn["test"]["mc"].drop()

        # Oplog thread
        doc_manager = DocManager()
        oplog_progress = LockingDict()
        self.opman = OplogThread(primary_client=self.main_conn,
                                 doc_managers=(doc_manager, ),
                                 oplog_progress_dict=oplog_progress,
                                 ns_set=["test.mc"])
    def get_oplog_thread(cls):
        """ Set up connection with mongo. Returns oplog, the connection and
            oplog collection

            This function clears the oplog
        """
        is_sharded = True
        primary_conn = Connection(HOSTNAME, int(PORTS_ONE["PRIMARY"]))
        if primary_conn['admin'].command("isMaster")['ismaster'] is False:
            primary_conn = Connection(HOSTNAME, int(PORTS_ONE["SECONDARY"]))

        primary_conn['test']['test'].drop()
        mongos_addr = "%s:%s" % (HOSTNAME, PORTS_ONE['MAIN'])

        if PORTS_ONE["MAIN"] == PORTS_ONE["PRIMARY"]:
            mongos_addr = "%s:%s" % (HOSTNAME, PORTS_ONE['MAIN'])
            is_sharded = False
        oplog_coll = primary_conn['local']['oplog.rs']
        oplog_coll.drop()           # reset the oplog

        primary_conn['local'].create_collection('oplog.rs', capped=True,
                                                size=1000000)
        namespace_set = ['test.test']
        doc_manager = DocManager()
        oplog = OplogThread(primary_conn, mongos_addr, oplog_coll, is_sharded,
                            doc_manager, LockingDict(),
                            namespace_set, cls.AUTH_KEY, AUTH_USERNAME,
                            repl_set="demo-repl")

        return(oplog, primary_conn, oplog_coll)
Esempio n. 5
0
    def get_oplog_thread(cls):
        """ Set up connection with mongo.

        Returns oplog, the connection and oplog collection.
        This function clears the oplog.
        """
        primary_conn = Connection(HOSTNAME, int(PORTS_ONE["PRIMARY"]))
        if primary_conn['admin'].command("isMaster")['ismaster'] is False:
            primary_conn = Connection(HOSTNAME, int(PORTS_ONE["SECONDARY"]))

        mongos_addr = "%s:%s" % (HOSTNAME, PORTS_ONE["MONGOS"])
        mongos = Connection(mongos_addr)
        mongos['alpha']['foo'].drop()

        oplog_coll = primary_conn['local']['oplog.rs']
        oplog_coll.drop()  # reset the oplog

        primary_conn['local'].create_collection('oplog.rs',
                                                capped=True,
                                                size=1000000)
        namespace_set = ['test.test', 'alpha.foo']
        doc_manager = DocManager()
        oplog = OplogThread(primary_conn,
                            mongos_addr, oplog_coll, True, doc_manager,
                            LockingDict(), namespace_set, cls.AUTH_KEY,
                            AUTH_USERNAME)

        return (oplog, primary_conn, oplog_coll, mongos)
Esempio n. 6
0
    def get_new_oplog(cls):
        """ Set up connection with mongo. Returns oplog, the connection and
            oplog collection

            This function does not clear the oplog
        """
        is_sharded = True
        primary_conn = Connection(HOSTNAME, int(PORTS_ONE["PRIMARY"]))
        if primary_conn['admin'].command("isMaster")['ismaster'] is False:
            primary_conn = Connection(HOSTNAME, int(PORTS_ONE["SECONDARY"]))

        mongos_addr = "%s:%s" % (HOSTNAME, PORTS_ONE['MAIN'])
        if PORTS_ONE["MAIN"] == PORTS_ONE["PRIMARY"]:
            mongos_addr = "%s:%s" % (HOSTNAME, PORTS_ONE['MAIN'])
            is_sharded = False
        oplog_coll = primary_conn['local']['oplog.rs']

        namespace_set = ['test.test']
        doc_manager = DocManager()
        oplog = OplogThread(primary_conn=primary_conn,
                            main_address=mongos_addr,
                            oplog_coll=oplog_coll,
                            is_sharded=is_sharded,
                            doc_manager=doc_manager,
                            oplog_progress_dict=LockingDict(),
                            namespace_set=namespace_set,
                            auth_key=cls.AUTH_KEY,
                            auth_username=AUTH_USERNAME,
                            repl_set="demo-repl")
        return (oplog, primary_conn, oplog.main_connection, oplog_coll)
Esempio n. 7
0
 def reset_opman(self, include_ns=None, exclude_ns=None, dest_mapping=None):
     self.namespace_config = NamespaceConfig(namespace_set=include_ns,
                                             ex_namespace_set=exclude_ns,
                                             namespace_options=dest_mapping)
     self.opman = OplogThread(primary_client=self.primary_conn,
                              doc_managers=(DocManager(), ),
                              oplog_progress_dict=LockingDict(),
                              namespace_config=self.namespace_config)
 def setUp(self):
     self.namespace_config = NamespaceConfig()
     self.opman = OplogThread(
         primary_client=self.primary_conn,
         doc_managers=(DocManager(), ),
         oplog_progress_dict=LockingDict(),
         namespace_config=self.namespace_config,
     )
Esempio n. 9
0
 def initOplogThread(self, namespace_set=[], dest_mapping={}):
     self.docman = CommandLoggerDocManager()
     self.docman.command_helper = CommandHelper(namespace_set, dest_mapping)
     self.opman = OplogThread(primary_client=self.primary_conn,
                              doc_managers=(self.docman, ),
                              oplog_progress_dict=self.oplog_progress,
                              namespace_set=namespace_set,
                              dest_mapping=dest_mapping,
                              collection_dump=False)
     self.opman.start()
Esempio n. 10
0
    def test_fields_and_exclude(self):
        fields = ['a', 'b', 'c', '_id']
        exclude_fields = ['x', 'y', 'z']

        # Test setting both to None in constructor
        opman = OplogThread(primary_client=self.primary_conn,
                            doc_managers=(DocManager(), ),
                            oplog_progress_dict=LockingDict(),
                            dest_mapping_stru=self.dest_mapping_stru,
                            fields=None,
                            exclude_fields=None)
        self._check_fields(opman, [], [], None)
        opman = OplogThread(primary_client=self.primary_conn,
                            doc_managers=(DocManager(), ),
                            oplog_progress_dict=LockingDict(),
                            dest_mapping_stru=self.dest_mapping_stru,
                            fields=None,
                            exclude_fields=exclude_fields)
        self._check_fields(opman, [], exclude_fields,
                           dict((f, 0) for f in exclude_fields))
        # Test setting fields when exclude_fields is set
        self.assertRaises(errors.InvalidConfiguration, setattr, opman,
                          "fields", fields)
        self.assertRaises(errors.InvalidConfiguration, setattr, opman,
                          "fields", None)
        opman = OplogThread(primary_client=self.primary_conn,
                            doc_managers=(DocManager(), ),
                            oplog_progress_dict=LockingDict(),
                            dest_mapping_stru=self.dest_mapping_stru,
                            exclude_fields=None,
                            fields=fields)
        self._check_fields(opman, fields, [], dict((f, 1) for f in fields))
        self.assertRaises(errors.InvalidConfiguration, setattr, opman,
                          "exclude_fields", exclude_fields)
        self.assertRaises(errors.InvalidConfiguration, setattr, opman,
                          "exclude_fields", None)
        self.assertRaises(errors.InvalidConfiguration,
                          OplogThread,
                          self.primary_conn, (DocManager(), ),
                          LockingDict(),
                          self.dest_mapping_stru,
                          fields=fields,
                          exclude_fields=exclude_fields)
Esempio n. 11
0
 def setUp(self):
     self.repl_set = ReplicaSetSingle().start()
     self.primary_conn = self.repl_set.client()
     self.oplog_coll = self.primary_conn.local['oplog.rs']
     self.dest_mapping_stru = DestMapping([], [], {})
     self.opman = OplogThread(
         primary_client=self.primary_conn,
         doc_managers=(DocManager(),),
         oplog_progress_dict=LockingDict(),
         dest_mapping_stru=self.dest_mapping_stru,
     )
Esempio n. 12
0
    def initOplogThread(self, namespace_set=None):
        self.docman = CommandLoggerDocManager()
        namespace_config = NamespaceConfig(namespace_set=namespace_set)

        self.docman.command_helper = CommandHelper(namespace_config)
        self.opman = OplogThread(primary_client=self.primary_conn,
                                 doc_managers=(self.docman, ),
                                 oplog_progress_dict=self.oplog_progress,
                                 namespace_config=namespace_config,
                                 collection_dump=False)
        self.opman.start()
Esempio n. 13
0
 def setUp(self):
     self.repl_set = ReplicaSetSingle().start()
     self.primary_conn = self.repl_set.client()
     self.oplog_coll = self.primary_conn.local["oplog.rs"]
     self.opman = OplogThread(
         primary_client=self.primary_conn,
         doc_managers=(DocManager(),),
         oplog_progress_dict=LockingDict(),
         namespace_config=NamespaceConfig(
             namespace_options={"test.*": True, "gridfs.*": {"gridfs": True}}
         ),
     )
Esempio n. 14
0
    def test_dump_collection(self):
        """Test the dump_collection method

        Cases:

        1. empty oplog
        2. non-empty oplog, with gridfs collections
        3. non-empty oplog, specified a namespace-set, none of the oplog
           entries are for collections in the namespace-set
        """

        # Test with empty oplog
        self.opman.oplog = self.primary_conn["test"]["emptycollection"]
        last_ts = self.opman.dump_collection()
        self.assertEqual(last_ts, None)

        # Test with non-empty oplog with gridfs collections
        self.opman.oplog = self.primary_conn["local"]["oplog.rs"]
        # Insert 10 gridfs files
        for i in range(10):
            fs = gridfs.GridFS(self.primary_conn["gridfs"],
                               collection="test" + str(i))
            fs.put(b"hello world")
        # Insert 1000 documents
        for i in range(1000):
            self.primary_conn["test"]["test"].insert_one({
                "i": i + 500
            })
        last_ts = self.opman.get_last_oplog_timestamp()
        self.assertEqual(last_ts, self.opman.dump_collection())
        self.assertEqual(len(self.opman.doc_managers[0]._search()), 1010)

        # Case 3
        # 1MB oplog so that we can rollover quickly
        repl_set = ReplicaSetSingle(oplogSize=1).start()
        conn = repl_set.client()
        opman = OplogThread(
            primary_client=conn,
            doc_managers=(DocManager(),),
            oplog_progress_dict=LockingDict(),
            namespace_config=NamespaceConfig(namespace_set=["test.test"]),
        )
        # Insert a document into an included collection
        conn["test"]["test"].insert_one({"test": 1})
        # Cause the oplog to rollover on a non-included collection
        while conn["local"]["oplog.rs"].find_one({"ns": "test.test"}):
            conn["test"]["ignored"].insert_many(
                [{"test": "1" * 1024} for _ in range(1024)])
        last_ts = opman.get_last_oplog_timestamp()
        self.assertEqual(last_ts, opman.dump_collection())
        self.assertEqual(len(opman.doc_managers[0]._search()), 1)
        conn.close()
        repl_set.stop()
Esempio n. 15
0
    def test_dump_collection(self):
        """Test the dump_collection method

        Cases:

        1. empty oplog
        2. non-empty oplog
        3. non-empty oplog, specified a namespace-set, none of the oplog
           entries are for collections in the namespace-set
        """

        # Test with empty oplog
        self.opman.oplog = self.primary_conn["test"]["emptycollection"]
        last_ts = self.opman.dump_collection()
        self.assertEqual(last_ts, None)

        # Test with non-empty oplog
        self.opman.oplog = self.primary_conn["local"]["oplog.rs"]
        for i in range(1000):
            self.primary_conn["test"]["test"].insert_one({
                "i": i + 500
            })
        last_ts = self.opman.get_last_oplog_timestamp()
        self.assertEqual(last_ts, self.opman.dump_collection())
        self.assertEqual(len(self.opman.doc_managers[0]._search()), 1000)

        # Case 3
        # 1MB oplog so that we can rollover quickly
        repl_set = ReplicaSetSingle(oplogSize=1).start()
        conn = repl_set.client()
        dest_mapping_stru = DestMapping(["test.test"], [], {})
        opman = OplogThread(
            primary_client=conn,
            doc_managers=(DocManager(),),
            oplog_progress_dict=LockingDict(),
            dest_mapping_stru=dest_mapping_stru,
            ns_set=set(["test.test"])
        )
        # Insert a document into a ns_set collection
        conn["test"]["test"].insert_one({"test": 1})
        # Cause the oplog to rollover on a non-ns_set collection
        while conn["local"]["oplog.rs"].find_one({"ns": "test.test"}):
            conn["test"]["ignored"].insert_many(
                [{"test": "1" * 1024} for _ in range(1024)])
        last_ts = opman.get_last_oplog_timestamp()
        self.assertEqual(last_ts, opman.dump_collection())
        self.assertEqual(len(opman.doc_managers[0]._search()), 1)
        conn.close()
        repl_set.stop()
Esempio n. 16
0
 def setUp(self):
     _, _, self.primary_p = start_replica_set('test-oplog-manager')
     self.primary_conn = pymongo.MongoClient(mongo_host, self.primary_p)
     self.oplog_coll = self.primary_conn.local['oplog.rs']
     self.opman = OplogThread(primary_conn=self.primary_conn,
                              main_address='%s:%d' %
                              (mongo_host, self.primary_p),
                              oplog_coll=self.oplog_coll,
                              is_sharded=False,
                              doc_manager=DocManager(),
                              oplog_progress_dict=LockingDict(),
                              namespace_set=None,
                              auth_key=None,
                              auth_username=None,
                              repl_set='test-oplog-manager')
Esempio n. 17
0
    def initOplogThread(self,
                        namespace_set=[],
                        ex_namespace_set=[],
                        dest_mapping={}):
        self.docman = CommandLoggerDocManager()
        # Replace the origin dest_mapping
        self.dest_mapping_stru = DestMapping(namespace_set, ex_namespace_set,
                                             dest_mapping)

        self.docman.command_helper = CommandHelper(self.dest_mapping_stru)
        self.opman = OplogThread(primary_client=self.primary_conn,
                                 doc_managers=(self.docman, ),
                                 oplog_progress_dict=self.oplog_progress,
                                 dest_mapping_stru=self.dest_mapping_stru,
                                 ns_set=namespace_set,
                                 ex_ns_set=ex_namespace_set,
                                 collection_dump=False)
        self.opman.start()
Esempio n. 18
0
    def get_new_oplog(cls):
        """ Set up connection with mongo.

        Returns oplog, the connection and oplog collection
        This function does not clear the oplog
        """
        primary_conn = Connection(HOSTNAME, int(PORTS_ONE["PRIMARY"]))
        if primary_conn['admin'].command("isMaster")['ismaster'] is False:
            primary_conn = Connection(HOSTNAME, int(PORTS_ONE["SECONDARY"]))

        mongos = "%s:%s" % (HOSTNAME, PORTS_ONE["MONGOS"])
        oplog_coll = primary_conn['local']['oplog.rs']

        namespace_set = ['test.test', 'alpha.foo']
        doc_manager = DocManager()
        oplog = OplogThread(primary_conn, mongos, oplog_coll, True,
                            doc_manager, LockingDict(), namespace_set,
                            cls.AUTH_KEY, AUTH_USERNAME)

        return (oplog, primary_conn, oplog_coll, oplog.main_connection)
Esempio n. 19
0
    def reset_opman(self, include_ns=None, exclude_ns=None, dest_mapping=None):
        if include_ns is None:
            include_ns = []
        if exclude_ns is None:
            exclude_ns = []
        if dest_mapping is None:
            dest_mapping = {}

        # include_ns must not exist together with exclude_ns
        # dest_mapping must exist together with include_ns
        # those checks have been tested in test_config.py so we skip that here.

        self.dest_mapping_stru = DestMapping(include_ns, exclude_ns,
                                             dest_mapping)
        self.opman = OplogThread(primary_client=self.primary_conn,
                                 doc_managers=(DocManager(), ),
                                 oplog_progress_dict=LockingDict(),
                                 dest_mapping_stru=self.dest_mapping_stru,
                                 ns_set=include_ns,
                                 ex_ns_set=exclude_ns)
Esempio n. 20
0
    def setUp(self):
        # Create a new oplog progress file
        try:
            os.unlink("config.txt")
        except OSError:
            pass
        open("config.txt", "w").close()

        # Start a replica set
        _, self.secondary_p, self.primary_p = start_replica_set('rollbacks')
        # Connection to the replica set as a whole
        self.main_conn = MongoClient('%s:%d' % (mongo_host, self.primary_p),
                                     replicaSet='rollbacks')
        # Connection to the primary specifically
        self.primary_conn = MongoClient('%s:%d' % (mongo_host, self.primary_p))
        # Connection to the secondary specifically
        self.secondary_conn = MongoClient(
            '%s:%d' % (mongo_host, self.secondary_p),
            read_preference=ReadPreference.SECONDARY_PREFERRED
        )

        # Wipe any test data
        self.main_conn["test"]["mc"].drop()

        # Oplog thread
        doc_manager = DocManager()
        oplog_progress = LockingDict()
        self.opman = OplogThread(
            primary_conn=self.main_conn,
            main_address='%s:%d' % (mongo_host, self.primary_p),
            oplog_coll=self.main_conn["local"]["oplog.rs"],
            is_sharded=False,
            doc_manager=doc_manager,
            oplog_progress_dict=oplog_progress,
            namespace_set=["test.mc"],
            auth_key=None,
            auth_username=None,
            repl_set="rollbacks"
        )
    def setUp(self):
        # Create a new oplog progress file
        try:
            os.unlink("config.txt")
        except OSError:
            pass
        open("config.txt", "w").close()

        # Start a replica set
        start_cluster(sharded=False, use_mongos=False)
        # Connection to the replica set as a whole
        self.main_conn = Connection("localhost:%s" % PORTS_ONE["PRIMARY"],
                                    replicaSet="demo-repl")
        # Connection to the primary specifically
        self.primary_conn = Connection("localhost:%s" % PORTS_ONE["PRIMARY"])
        # Connection to the secondary specifically
        self.secondary_conn = Connection(
            "localhost:%s" % PORTS_ONE["SECONDARY"],
            read_preference=ReadPreference.SECONDARY_PREFERRED)

        # Wipe any test data
        self.main_conn["test"]["mc"].drop()

        # Oplog thread
        doc_manager = DocManager()
        oplog_progress = LockingDict()
        self.opman = OplogThread(
            primary_conn=self.main_conn,
            main_address="localhost:%s" % PORTS_ONE["PRIMARY"],
            oplog_coll=self.main_conn["local"]["oplog.rs"],
            is_sharded=False,
            doc_manager=doc_manager,
            oplog_progress_dict=oplog_progress,
            namespace_set=["test.mc"],
            auth_key=None,
            auth_username=None,
            repl_set="demo-repl")
Esempio n. 22
0
 def setUp(self):
     self.dest_mapping_stru = DestMapping([], [], {})
     self.opman = OplogThread(primary_client=self.primary_conn,
                              doc_managers=(DocManager(), ),
                              oplog_progress_dict=LockingDict(),
                              dest_mapping_stru=self.dest_mapping_stru)
Esempio n. 23
0
    def test_fields_constructor(self):
        # Test with "_id" field in constructor
        fields = ["_id", "title", "content", "author"]
        opman = OplogThread(primary_client=self.primary_conn,
                            doc_managers=(DocManager(), ),
                            oplog_progress_dict=LockingDict(),
                            dest_mapping_stru=self.dest_mapping_stru,
                            fields=fields)
        self._check_fields(opman, fields, [], dict((f, 1) for f in fields))
        extra_fields = fields + ['extra1', 'extra2']
        filtered = opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual(dict((f, 1) for f in fields), filtered)

        # Test without "_id" field in constructor
        fields = ["title", "content", "author"]
        opman = OplogThread(primary_client=self.primary_conn,
                            doc_managers=(DocManager(), ),
                            oplog_progress_dict=LockingDict(),
                            dest_mapping_stru=self.dest_mapping_stru,
                            fields=fields)
        fields.append('_id')
        self._check_fields(opman, fields, [], dict((f, 1) for f in fields))
        extra_fields = fields + ['extra1', 'extra2']
        filtered = opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual(dict((f, 1) for f in fields), filtered)

        # Test with only "_id" field
        fields = ["_id"]
        opman = OplogThread(primary_client=self.primary_conn,
                            doc_managers=(DocManager(), ),
                            oplog_progress_dict=LockingDict(),
                            dest_mapping_stru=self.dest_mapping_stru,
                            fields=fields)
        self._check_fields(opman, fields, [], dict((f, 1) for f in fields))
        extra_fields = fields + ['extra1', 'extra2']
        filtered = opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual({'_id': 1}, filtered)

        # Test with no fields set
        opman = OplogThread(primary_client=self.primary_conn,
                            doc_managers=(DocManager(), ),
                            oplog_progress_dict=LockingDict(),
                            dest_mapping_stru=self.dest_mapping_stru)
        self._check_fields(opman, [], [], None)
        extra_fields = ['_id', 'extra1', 'extra2']
        filtered = opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual(dict((f, 1) for f in extra_fields), filtered)
Esempio n. 24
0
    def test_exclude_fields_constructor(self):
        # Test with the "_id" field in exclude_fields
        exclude_fields = ["_id", "title", "content", "author"]
        opman = OplogThread(primary_client=self.primary_conn,
                            doc_managers=(DocManager(), ),
                            oplog_progress_dict=LockingDict(),
                            dest_mapping_stru=self.dest_mapping_stru,
                            exclude_fields=exclude_fields)
        exclude_fields.remove('_id')
        self._check_fields(opman, [], exclude_fields,
                           dict((f, 0) for f in exclude_fields))
        extra_fields = exclude_fields + ['extra1', 'extra2']
        filtered = opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual(dict((f, 1) for f in ['extra1', 'extra2']), filtered)

        # Test without "_id" field included in exclude_fields
        exclude_fields = ["title", "content", "author"]
        opman = OplogThread(primary_client=self.primary_conn,
                            doc_managers=(DocManager(), ),
                            oplog_progress_dict=LockingDict(),
                            dest_mapping_stru=self.dest_mapping_stru,
                            exclude_fields=exclude_fields)
        self._check_fields(opman, [], exclude_fields,
                           dict((f, 0) for f in exclude_fields))
        extra_fields = extra_fields + ['extra1', 'extra2']
        filtered = opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual({'extra1': 1, 'extra2': 1}, filtered)

        # Test with only "_id" field in exclude_fields
        exclude_fields = ["_id"]
        opman = OplogThread(primary_client=self.primary_conn,
                            doc_managers=(DocManager(), ),
                            oplog_progress_dict=LockingDict(),
                            dest_mapping_stru=self.dest_mapping_stru,
                            exclude_fields=exclude_fields)
        self._check_fields(opman, [], [], None)
        extra_fields = exclude_fields + ['extra1', 'extra2']
        filtered = opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual(dict((f, 1) for f in extra_fields), filtered)

        # Test with nothing set for exclude_fields
        opman = OplogThread(primary_client=self.primary_conn,
                            doc_managers=(DocManager(), ),
                            oplog_progress_dict=LockingDict(),
                            dest_mapping_stru=self.dest_mapping_stru,
                            exclude_fields=None)
        self._check_fields(opman, [], [], None)
        extra_fields = ['_id', 'extra1', 'extra2']
        filtered = opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual(dict((f, 1) for f in extra_fields), filtered)
Esempio n. 25
0
    def run(self):
        """Discovers the mongo cluster and creates a thread for each primary.
        """
        self.main_conn = self.create_authed_client()
        LOG.always('Source MongoDB version: %s',
                   self.main_conn.admin.command('buildInfo')['version'])

        for dm in self.doc_managers:
            name = dm.__class__.__module__
            module = sys.modules[name]
            version = 'unknown'
            if hasattr(module, '__version__'):
                version = module.__version__
            elif hasattr(module, 'version'):
                version = module.version
            LOG.always('Target DocManager: %s version: %s', name, version)

        self.read_oplog_progress()
        conn_type = None

        try:
            self.main_conn.admin.command("isdbgrid")
        except pymongo.errors.OperationFailure:
            conn_type = "REPLSET"

        if conn_type == "REPLSET":
            # Make sure we are connected to a replica set
            is_master = self.main_conn.admin.command("isMaster")
            if "setName" not in is_master:
                LOG.error(
                    'No replica set at "%s"! A replica set is required '
                    'to run mongo-connector. Shutting down...' % self.address
                )
                return

            # Establish a connection to the replica set as a whole
            self.main_conn.close()
            self.main_conn = self.create_authed_client(
                replicaSet=is_master['setName'])

            # non sharded configuration
            oplog = OplogThread(
                self.main_conn, self.doc_managers, self.oplog_progress,
                self.dest_mapping, **self.kwargs)
            self.shard_set[0] = oplog
            LOG.info('MongoConnector: Starting connection thread %s' %
                     self.main_conn)
            oplog.start()

            while self.can_run:
                shard_thread = self.shard_set[0]
                if not (shard_thread.running and shard_thread.is_alive()):
                    LOG.error("MongoConnector: OplogThread"
                              " %s unexpectedly stopped! Shutting down" %
                              (str(self.shard_set[0])))
                    self.oplog_thread_join()
                    for dm in self.doc_managers:
                        dm.stop()
                    return

                self.write_oplog_progress()
                time.sleep(1)

        else:       # sharded cluster
            while self.can_run:

                for shard_doc in retry_until_ok(self.main_conn.admin.command,
                                                'listShards')['shards']:
                    shard_id = shard_doc['_id']
                    if shard_id in self.shard_set:
                        shard_thread = self.shard_set[shard_id]
                        if not (shard_thread.running and shard_thread.is_alive()):
                            LOG.error("MongoConnector: OplogThread "
                                      "%s unexpectedly stopped! Shutting "
                                      "down" %
                                      (str(self.shard_set[shard_id])))
                            self.oplog_thread_join()
                            for dm in self.doc_managers:
                                dm.stop()
                            return

                        self.write_oplog_progress()
                        time.sleep(1)
                        continue
                    try:
                        repl_set, hosts = shard_doc['host'].split('/')
                    except ValueError:
                        cause = "The system only uses replica sets!"
                        LOG.exception("MongoConnector: %s", cause)
                        self.oplog_thread_join()
                        for dm in self.doc_managers:
                            dm.stop()
                        return

                    shard_conn = self.create_authed_client(
                        hosts, replicaSet=repl_set)
                    oplog = OplogThread(
                        shard_conn, self.doc_managers, self.oplog_progress,
                        self.dest_mapping, mongos_client=self.main_conn,
                        **self.kwargs)
                    self.shard_set[shard_id] = oplog
                    msg = "Starting connection thread"
                    LOG.info("MongoConnector: %s %s" % (msg, shard_conn))
                    oplog.start()

        if self.signal is not None:
            LOG.info("recieved signal %s: shutting down...", self.signal)
        self.oplog_thread_join()
        self.write_oplog_progress()
Esempio n. 26
0
    def setUp(self):
        """ Initialize the cluster:

        Clean out the databases used by the tests
        Make connections to mongos, mongods
        Create and shard test collections
        Create OplogThreads
        """
        self.cluster = ShardedCluster().start()

        # Connection to mongos
        self.mongos_conn = self.cluster.client()

        # Connections to the shards
        self.shard1_conn = self.cluster.shards[0].client()
        self.shard2_conn = self.cluster.shards[1].client()
        self.shard1_secondary_conn = self.cluster.shards[0].secondary.client(
            readPreference=ReadPreference.SECONDARY_PREFERRED)
        self.shard2_secondary_conn = self.cluster.shards[1].secondary.client(
            readPreference=ReadPreference.SECONDARY_PREFERRED)

        # Wipe any test data
        self.mongos_conn["test"]["mcsharded"].drop()

        # Create and shard the collection test.mcsharded on the "i" field
        self.mongos_conn["test"]["mcsharded"].ensure_index("i")
        self.mongos_conn.admin.command("enableSharding", "test")
        self.mongos_conn.admin.command("shardCollection",
                                       "test.mcsharded",
                                       key={"i": 1})

        # Pre-split the collection so that:
        # i < 1000            lives on shard1
        # i >= 1000           lives on shard2
        self.mongos_conn.admin.command(
            bson.SON([("split", "test.mcsharded"), ("middle", {
                "i": 1000
            })]))

        # disable the balancer
        self.mongos_conn.config.settings.update({"_id": "balancer"},
                                                {"$set": {
                                                    "stopped": True
                                                }},
                                                upsert=True)

        # Move chunks to their proper places
        try:
            self.mongos_conn["admin"].command("moveChunk",
                                              "test.mcsharded",
                                              find={"i": 1},
                                              to='demo-set-0')
        except pymongo.errors.OperationFailure:
            pass
        try:
            self.mongos_conn["admin"].command("moveChunk",
                                              "test.mcsharded",
                                              find={"i": 1000},
                                              to='demo-set-1')
        except pymongo.errors.OperationFailure:
            pass

        # Make sure chunks are distributed correctly
        self.mongos_conn["test"]["mcsharded"].insert({"i": 1})
        self.mongos_conn["test"]["mcsharded"].insert({"i": 1000})

        def chunks_moved():
            doc1 = self.shard1_conn.test.mcsharded.find_one()
            doc2 = self.shard2_conn.test.mcsharded.find_one()
            if None in (doc1, doc2):
                return False
            return doc1['i'] == 1 and doc2['i'] == 1000

        assert_soon(chunks_moved,
                    max_tries=120,
                    message='chunks not moved? doc1=%r, doc2=%r' %
                    (self.shard1_conn.test.mcsharded.find_one(),
                     self.shard2_conn.test.mcsharded.find_one()))
        self.mongos_conn.test.mcsharded.remove()

        # create a new oplog progress file
        try:
            os.unlink("oplog.timestamp")
        except OSError:
            pass
        open("oplog.timestamp", "w").close()

        # Oplog threads (oplog manager) for each shard
        doc_manager = DocManager()
        oplog_progress = LockingDict()
        self.opman1 = OplogThread(
            primary_client=self.shard1_conn,
            doc_managers=(doc_manager, ),
            oplog_progress_dict=oplog_progress,
            namespace_set=["test.mcsharded", "test.mcunsharded"],
            mongos_client=self.mongos_conn)
        self.opman2 = OplogThread(
            primary_client=self.shard2_conn,
            doc_managers=(doc_manager, ),
            oplog_progress_dict=oplog_progress,
            namespace_set=["test.mcsharded", "test.mcunsharded"],
            mongos_client=self.mongos_conn)
    def setUp(self):
        """ Initialize the cluster:

        Clean out the databases used by the tests
        Make connections to mongos, mongods
        Create and shard test collections
        Create OplogThreads
        """
        # Start the cluster with a mongos on port 27217
        self.mongos_p = start_cluster()

        # Connection to mongos
        mongos_address = '%s:%d' % (mongo_host, self.mongos_p)
        self.mongos_conn = MongoClient(mongos_address)

        # Connections to the shards
        shard1_ports = get_shard(self.mongos_p, 0)
        shard2_ports = get_shard(self.mongos_p, 1)
        self.shard1_prim_p = shard1_ports['primary']
        self.shard1_scnd_p = shard1_ports['secondaries'][0]
        self.shard2_prim_p = shard2_ports['primary']
        self.shard2_scnd_p = shard2_ports['secondaries'][0]
        self.shard1_conn = MongoClient('%s:%d'
                                       % (mongo_host, self.shard1_prim_p),
                                       replicaSet="demo-set-0")
        self.shard2_conn = MongoClient('%s:%d'
                                       % (mongo_host, self.shard2_prim_p),
                                       replicaSet="demo-set-1")
        self.shard1_secondary_conn = MongoClient(
            '%s:%d' % (mongo_host, self.shard1_scnd_p),
            read_preference=ReadPreference.SECONDARY_PREFERRED
        )
        self.shard2_secondary_conn = MongoClient(
            '%s:%d' % (mongo_host, self.shard2_scnd_p),
            read_preference=ReadPreference.SECONDARY_PREFERRED
        )

        # Wipe any test data
        self.mongos_conn["test"]["mcsharded"].drop()

        # Create and shard the collection test.mcsharded on the "i" field
        self.mongos_conn["test"]["mcsharded"].ensure_index("i")
        self.mongos_conn.admin.command("enableSharding", "test")
        self.mongos_conn.admin.command("shardCollection",
                                       "test.mcsharded",
                                       key={"i": 1})

        # Pre-split the collection so that:
        # i < 1000            lives on shard1
        # i >= 1000           lives on shard2
        self.mongos_conn.admin.command(bson.SON([
            ("split", "test.mcsharded"),
            ("middle", {"i": 1000})
        ]))

        # disable the balancer
        self.mongos_conn.config.settings.update(
            {"_id": "balancer"},
            {"$set": {"stopped": True}},
            upsert=True
        )

        # Move chunks to their proper places
        try:
            self.mongos_conn["admin"].command(
                "moveChunk",
                "test.mcsharded",
                find={"i": 1},
                to="demo-set-0"
            )
        except pymongo.errors.OperationFailure:
            pass        # chunk may already be on the correct shard
        try:
            self.mongos_conn["admin"].command(
                "moveChunk",
                "test.mcsharded",
                find={"i": 1000},
                to="demo-set-1"
            )
        except pymongo.errors.OperationFailure:
            pass        # chunk may already be on the correct shard

        # Make sure chunks are distributed correctly
        self.mongos_conn["test"]["mcsharded"].insert({"i": 1})
        self.mongos_conn["test"]["mcsharded"].insert({"i": 1000})

        def chunks_moved():
            doc1 = self.shard1_conn.test.mcsharded.find_one()
            doc2 = self.shard2_conn.test.mcsharded.find_one()
            if None in (doc1, doc2):
                return False
            return doc1['i'] == 1 and doc2['i'] == 1000
        assert_soon(chunks_moved)
        self.mongos_conn.test.mcsharded.remove()

        # create a new oplog progress file
        try:
            os.unlink("config.txt")
        except OSError:
            pass
        open("config.txt", "w").close()

        # Oplog threads (oplog manager) for each shard
        doc_manager = DocManager()
        oplog_progress = LockingDict()
        self.opman1 = OplogThread(
            primary_conn=self.shard1_conn,
            main_address='%s:%d' % (mongo_host, self.mongos_p),
            oplog_coll=self.shard1_conn["local"]["oplog.rs"],
            is_sharded=True,
            doc_manager=doc_manager,
            oplog_progress_dict=oplog_progress,
            namespace_set=["test.mcsharded", "test.mcunsharded"],
            auth_key=None,
            auth_username=None
        )
        self.opman2 = OplogThread(
            primary_conn=self.shard2_conn,
            main_address='%s:%d' % (mongo_host, self.mongos_p),
            oplog_coll=self.shard2_conn["local"]["oplog.rs"],
            is_sharded=True,
            doc_manager=doc_manager,
            oplog_progress_dict=oplog_progress,
            namespace_set=["test.mcsharded", "test.mcunsharded"],
            auth_key=None,
            auth_username=None
        )
Esempio n. 28
0
    def run(self):
        """Discovers the mongo cluster and creates a thread for each primary.
        """
        main_conn = MongoClient(self.address)
        if self.auth_key is not None:
            main_conn['admin'].authenticate(self.auth_username, self.auth_key)
        self.read_oplog_progress()
        conn_type = None

        try:
            main_conn.admin.command("isdbgrid")
        except pymongo.errors.OperationFailure:
            conn_type = "REPLSET"

        if conn_type == "REPLSET":
            # Make sure we are connected to a replica set
            is_master = main_conn.admin.command("isMaster")
            if not "setName" in is_master:
                logging.error(
                    'No replica set at "%s"! A replica set is required '
                    'to run mongo-connector. Shutting down...' % self.address)
                return

            # Establish a connection to the replica set as a whole
            main_conn.disconnect()
            main_conn = MongoClient(self.address,
                                    replicaSet=is_master['setName'])
            if self.auth_key is not None:
                main_conn.admin.authenticate(self.auth_username, self.auth_key)

            #non sharded configuration
            oplog_coll = main_conn['local']['oplog.rs']

            oplog = OplogThread(primary_conn=main_conn,
                                main_address=self.address,
                                oplog_coll=oplog_coll,
                                is_sharded=False,
                                doc_manager=self.doc_managers,
                                oplog_progress_dict=self.oplog_progress,
                                namespace_set=self.ns_set,
                                auth_key=self.auth_key,
                                auth_username=self.auth_username,
                                repl_set=is_master['setName'],
                                collection_dump=self.collection_dump,
                                batch_size=self.batch_size,
                                fields=self.fields,
                                dest_mapping=self.dest_mapping,
                                continue_on_error=self.continue_on_error)
            self.shard_set[0] = oplog
            logging.info('MongoConnector: Starting connection thread %s' %
                         main_conn)
            oplog.start()

            while self.can_run:
                if not self.shard_set[0].running:
                    logging.error("MongoConnector: OplogThread"
                                  " %s unexpectedly stopped! Shutting down" %
                                  (str(self.shard_set[0])))
                    self.oplog_thread_join()
                    for dm in self.doc_managers:
                        dm.stop()
                    return

                self.write_oplog_progress()
                time.sleep(1)

        else:  # sharded cluster
            while self.can_run is True:

                for shard_doc in main_conn['config']['shards'].find():
                    shard_id = shard_doc['_id']
                    if shard_id in self.shard_set:
                        if not self.shard_set[shard_id].running:
                            logging.error("MongoConnector: OplogThread "
                                          "%s unexpectedly stopped! Shutting "
                                          "down" %
                                          (str(self.shard_set[shard_id])))
                            self.oplog_thread_join()
                            for dm in self.doc_managers:
                                dm.stop()
                            return

                        self.write_oplog_progress()
                        time.sleep(1)
                        continue
                    try:
                        repl_set, hosts = shard_doc['host'].split('/')
                    except ValueError:
                        cause = "The system only uses replica sets!"
                        logging.error("MongoConnector: %s", cause)
                        self.oplog_thread_join()
                        for dm in self.doc_managers:
                            dm.stop()
                        return

                    shard_conn = MongoClient(hosts, replicaSet=repl_set)
                    oplog_coll = shard_conn['local']['oplog.rs']

                    oplog = OplogThread(
                        primary_conn=shard_conn,
                        main_address=self.address,
                        oplog_coll=oplog_coll,
                        is_sharded=True,
                        doc_manager=self.doc_managers,
                        oplog_progress_dict=self.oplog_progress,
                        namespace_set=self.ns_set,
                        auth_key=self.auth_key,
                        auth_username=self.auth_username,
                        collection_dump=self.collection_dump,
                        batch_size=self.batch_size,
                        fields=self.fields,
                        dest_mapping=self.dest_mapping,
                        continue_on_error=self.continue_on_error)
                    self.shard_set[shard_id] = oplog
                    msg = "Starting connection thread"
                    logging.info("MongoConnector: %s %s" % (msg, shard_conn))
                    oplog.start()

        self.oplog_thread_join()
        self.write_oplog_progress()
Esempio n. 29
0
    def run(self):
        """Discovers the mongo cluster and creates a thread for each primary.
        """
        main_conn = MongoClient(self.address,
                                tz_aware=self.tz_aware,
                                **self.ssl_kwargs)
        if self.auth_key is not None:
            main_conn['admin'].authenticate(self.auth_username, self.auth_key)
        self.read_oplog_progress()
        conn_type = None

        try:
            main_conn.admin.command("isdbgrid")
        except pymongo.errors.OperationFailure:
            conn_type = "REPLSET"

        if conn_type == "REPLSET":
            # Make sure we are connected to a replica set
            is_master = main_conn.admin.command("isMaster")
            if "setName" not in is_master:
                LOG.error('No replica set at "%s"! A replica set is required '
                          'to run mongo-connector. Shutting down...' %
                          self.address)
                return

            # Establish a connection to the replica set as a whole
            main_conn.close()
            main_conn = MongoClient(self.address,
                                    replicaSet=is_master['setName'],
                                    tz_aware=self.tz_aware,
                                    **self.ssl_kwargs)
            if self.auth_key is not None:
                main_conn.admin.authenticate(self.auth_username, self.auth_key)

            # non sharded configuration
            oplog = OplogThread(main_conn, self.doc_managers,
                                self.oplog_progress, **self.kwargs)
            self.shard_set[0] = oplog
            LOG.info('MongoConnector: Starting connection thread %s' %
                     main_conn)
            oplog.start()

            while self.can_run:
                if not self.shard_set[0].running:
                    LOG.error("MongoConnector: OplogThread"
                              " %s unexpectedly stopped! Shutting down" %
                              (str(self.shard_set[0])))
                    self.oplog_thread_join()
                    for dm in self.doc_managers:
                        dm.stop()
                    return

                self.write_oplog_progress()
                time.sleep(1)

        else:  # sharded cluster
            while self.can_run is True:

                for shard_doc in main_conn['config']['shards'].find():
                    shard_id = shard_doc['_id']
                    if shard_id in self.shard_set:
                        if not self.shard_set[shard_id].running:
                            LOG.error("MongoConnector: OplogThread "
                                      "%s unexpectedly stopped! Shutting "
                                      "down" % (str(self.shard_set[shard_id])))
                            self.oplog_thread_join()
                            for dm in self.doc_managers:
                                dm.stop()
                            return

                        self.write_oplog_progress()
                        time.sleep(1)
                        continue
                    try:
                        repl_set, hosts = shard_doc['host'].split('/')
                    except ValueError:
                        cause = "The system only uses replica sets!"
                        LOG.exception("MongoConnector: %s", cause)
                        self.oplog_thread_join()
                        for dm in self.doc_managers:
                            dm.stop()
                        return

                    shard_conn = MongoClient(hosts,
                                             replicaSet=repl_set,
                                             tz_aware=self.tz_aware,
                                             **self.ssl_kwargs)
                    if self.auth_key is not None:
                        shard_conn['admin'].authenticate(
                            self.auth_username, self.auth_key)
                    oplog = OplogThread(shard_conn, self.doc_managers,
                                        self.oplog_progress, **self.kwargs)
                    self.shard_set[shard_id] = oplog
                    msg = "Starting connection thread"
                    LOG.info("MongoConnector: %s %s" % (msg, shard_conn))
                    oplog.start()

        self.oplog_thread_join()
        self.write_oplog_progress()
Esempio n. 30
0
    def test_fields(self):
        # Test with "_id" field in constructor
        fields = ["_id", "title", "content", "author"]
        opman = OplogThread(primary_client=self.primary_conn,
                            doc_managers=(DocManager(), ),
                            oplog_progress_dict=LockingDict(),
                            fields=fields)
        self.assertEqual(set(fields), opman._fields)
        self.assertEqual(sorted(fields), sorted(opman.fields))
        extra_fields = fields + ['extra1', 'extra2']
        filtered = opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual(dict((f, 1) for f in fields), filtered)

        # Test without "_id" field in constructor
        fields = ["title", "content", "author"]
        opman = OplogThread(primary_client=self.primary_conn,
                            doc_managers=(DocManager(), ),
                            oplog_progress_dict=LockingDict(),
                            fields=fields)
        self.assertEqual(set(fields), opman._fields)
        self.assertEqual(sorted(fields), sorted(opman.fields))
        extra_fields = fields + ['extra1', 'extra2']
        filtered = opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual(dict((f, 1) for f in fields), filtered)

        # Test with only "_id" field
        fields = ["_id"]
        opman = OplogThread(primary_client=self.primary_conn,
                            doc_managers=(DocManager(), ),
                            oplog_progress_dict=LockingDict(),
                            fields=fields)
        self.assertEqual(set(fields), opman._fields)
        self.assertEqual(fields, opman.fields)
        extra_fields = fields + ['extra1', 'extra2']
        filtered = opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual({'_id': 1}, filtered)

        # Test with no fields set
        opman = OplogThread(
            primary_client=self.primary_conn,
            doc_managers=(DocManager(), ),
            oplog_progress_dict=LockingDict(),
        )
        self.assertEqual(set([]), opman._fields)
        self.assertEqual(None, opman.fields)
        extra_fields = ['_id', 'extra1', 'extra2']
        filtered = opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual(dict((f, 1) for f in extra_fields), filtered)