def test_init_cursor(self): """Test init_cursor in oplog_manager. Assertion failure if it doesn't pass """ test_oplog, primary_conn, search_ts = self.get_oplog_thread() test_oplog.checkpoint = None # needed for these tests # initial tests with no config file and empty oplog self.assertEqual(test_oplog.init_cursor(), None) # no config, single oplog entry primary_conn['test']['test'].insert({'name': 'paulie'}) search_ts = test_oplog.get_last_oplog_timestamp() cursor = test_oplog.init_cursor() self.assertEqual(cursor.count(), 1) self.assertEqual(test_oplog.checkpoint, search_ts) # with config file, assert that size != 0 os.system('touch temp_config.txt') cursor = test_oplog.init_cursor() oplog_dict = test_oplog.oplog_progress.get_dict() self.assertEqual(cursor.count(), 1) self.assertTrue(str(test_oplog.oplog) in oplog_dict) self.assertTrue( oplog_dict[str(test_oplog.oplog)] == test_oplog.checkpoint) os.system('rm temp_config.txt') # test init_cursor when OplogThread created with/without no-dump option # insert some documents (will need to be dumped) primary_conn['test']['test'].remove() primary_conn['test']['test'].insert(({"_id": i} for i in range(100))) # test no-dump option docman = test_oplog.doc_managers[0] docman._delete() test_oplog.collection_dump = False test_oplog.oplog_progress = LockingDict() # init_cursor has the side-effect of causing a collection dump test_oplog.init_cursor() self.assertEqual(len(docman._search()), 0) # test w/o no-dump option docman._delete() test_oplog.collection_dump = True test_oplog.oplog_progress = LockingDict() test_oplog.init_cursor() self.assertEqual(len(docman._search()), 100)
def test_upgrade_oplog_progress(self): first_oplog_ts1 = self.opman1.oplog.find_one()['ts'] first_oplog_ts2 = self.opman2.oplog.find_one()['ts'] # Old format oplog progress file: progress = { str(self.opman1.oplog): bson_ts_to_long(first_oplog_ts1), str(self.opman2.oplog): bson_ts_to_long(first_oplog_ts2) } # Set up oplog managers to use the old format. oplog_progress = LockingDict() oplog_progress.dict = progress self.opman1.oplog_progress = oplog_progress self.opman2.oplog_progress = oplog_progress # Cause the oplog managers to update their checkpoints. self.opman1.update_checkpoint(first_oplog_ts1) self.opman2.update_checkpoint(first_oplog_ts2) # New format should be in place now. new_format = { self.opman1.replset_name: first_oplog_ts1, self.opman2.replset_name: first_oplog_ts2 } self.assertEqual( new_format, self.opman1.oplog_progress.get_dict() ) self.assertEqual( new_format, self.opman2.oplog_progress.get_dict() )
def get_oplog_thread(cls): """ Set up connection with mongo. Returns oplog, the connection and oplog collection This function clears the oplog """ is_sharded = True primary_conn = Connection(HOSTNAME, int(PORTS_ONE["PRIMARY"])) if primary_conn['admin'].command("isMaster")['ismaster'] is False: primary_conn = Connection(HOSTNAME, int(PORTS_ONE["SECONDARY"])) primary_conn['test']['test'].drop() mongos_addr = "%s:%s" % (HOSTNAME, PORTS_ONE['MAIN']) if PORTS_ONE["MAIN"] == PORTS_ONE["PRIMARY"]: mongos_addr = "%s:%s" % (HOSTNAME, PORTS_ONE['MAIN']) is_sharded = False oplog_coll = primary_conn['local']['oplog.rs'] oplog_coll.drop() # reset the oplog primary_conn['local'].create_collection('oplog.rs', capped=True, size=1000000) namespace_set = ['test.test'] doc_manager = DocManager() oplog = OplogThread(primary_conn, mongos_addr, oplog_coll, is_sharded, doc_manager, LockingDict(), namespace_set, cls.AUTH_KEY, AUTH_USERNAME, repl_set="demo-repl") return(oplog, primary_conn, oplog_coll)
def setUp(self): # Create a new oplog progress file try: os.unlink("oplog.timestamp") except OSError: pass open("oplog.timestamp", "w").close() # Start a replica set self.repl_set = ReplicaSet().start() # Connection to the replica set as a whole self.main_conn = self.repl_set.client() # Connection to the primary specifically self.primary_conn = self.repl_set.primary.client() # Connection to the secondary specifically self.secondary_conn = self.repl_set.secondary.client( read_preference=ReadPreference.SECONDARY_PREFERRED) # Wipe any test data self.main_conn["test"]["mc"].drop() # Oplog thread doc_manager = DocManager() oplog_progress = LockingDict() self.opman = OplogThread(primary_client=self.main_conn, doc_managers=(doc_manager, ), oplog_progress_dict=oplog_progress, ns_set=["test.mc"])
def get_oplog_thread(cls): """ Set up connection with mongo. Returns oplog, the connection and oplog collection. This function clears the oplog. """ primary_conn = Connection(HOSTNAME, int(PORTS_ONE["PRIMARY"])) if primary_conn['admin'].command("isMaster")['ismaster'] is False: primary_conn = Connection(HOSTNAME, int(PORTS_ONE["SECONDARY"])) mongos_addr = "%s:%s" % (HOSTNAME, PORTS_ONE["MONGOS"]) mongos = Connection(mongos_addr) mongos['alpha']['foo'].drop() oplog_coll = primary_conn['local']['oplog.rs'] oplog_coll.drop() # reset the oplog primary_conn['local'].create_collection('oplog.rs', capped=True, size=1000000) namespace_set = ['test.test', 'alpha.foo'] doc_manager = DocManager() oplog = OplogThread(primary_conn, mongos_addr, oplog_coll, True, doc_manager, LockingDict(), namespace_set, cls.AUTH_KEY, AUTH_USERNAME) return (oplog, primary_conn, oplog_coll, mongos)
def get_new_oplog(cls): """ Set up connection with mongo. Returns oplog, the connection and oplog collection This function does not clear the oplog """ is_sharded = True primary_conn = Connection(HOSTNAME, int(PORTS_ONE["PRIMARY"])) if primary_conn['admin'].command("isMaster")['ismaster'] is False: primary_conn = Connection(HOSTNAME, int(PORTS_ONE["SECONDARY"])) mongos_addr = "%s:%s" % (HOSTNAME, PORTS_ONE['MAIN']) if PORTS_ONE["MAIN"] == PORTS_ONE["PRIMARY"]: mongos_addr = "%s:%s" % (HOSTNAME, PORTS_ONE['MAIN']) is_sharded = False oplog_coll = primary_conn['local']['oplog.rs'] namespace_set = ['test.test'] doc_manager = DocManager() oplog = OplogThread(primary_conn=primary_conn, main_address=mongos_addr, oplog_coll=oplog_coll, is_sharded=is_sharded, doc_manager=doc_manager, oplog_progress_dict=LockingDict(), namespace_set=namespace_set, auth_key=cls.AUTH_KEY, auth_username=AUTH_USERNAME, repl_set="demo-repl") return (oplog, primary_conn, oplog.main_connection, oplog_coll)
def test_skipped_oplog_entry_updates_checkpoint(self): repl_set = ReplicaSetSingle().start() conn = repl_set.client() opman = OplogThread( primary_client=conn, doc_managers=(DocManager(),), oplog_progress_dict=LockingDict(), namespace_config=NamespaceConfig(namespace_set=["test.test"]), ) opman.start() # Insert a document into an included collection conn["test"]["test"].insert_one({"test": 1}) last_ts = opman.get_last_oplog_timestamp() assert_soon( lambda: last_ts == opman.checkpoint, "OplogThread never updated checkpoint to non-skipped " "entry.", ) self.assertEqual(len(opman.doc_managers[0]._search()), 1) # Make sure that the oplog thread updates its checkpoint on every # oplog entry. conn["test"]["ignored"].insert_one({"test": 1}) last_ts = opman.get_last_oplog_timestamp() assert_soon( lambda: last_ts == opman.checkpoint, "OplogThread never updated checkpoint to skipped entry.", ) opman.join() conn.close() repl_set.stop()
def setUp(self): self.repl_set = ReplicaSet().start() self.primary_conn = self.repl_set.client() self.oplog_coll = self.primary_conn.local['oplog.rs'] self.opman = OplogThread(primary_client=self.primary_conn, doc_managers=(DocManager(), ), oplog_progress_dict=LockingDict())
def setUp(self): self.namespace_config = NamespaceConfig() self.opman = OplogThread( primary_client=self.primary_conn, doc_managers=(DocManager(), ), oplog_progress_dict=LockingDict(), namespace_config=self.namespace_config, )
def reset_opman(self, include_ns=None, exclude_ns=None, dest_mapping=None): self.namespace_config = NamespaceConfig(namespace_set=include_ns, ex_namespace_set=exclude_ns, namespace_options=dest_mapping) self.opman = OplogThread(primary_client=self.primary_conn, doc_managers=(DocManager(), ), oplog_progress_dict=LockingDict(), namespace_config=self.namespace_config)
def test_fields_and_exclude(self): fields = ['a', 'b', 'c', '_id'] exclude_fields = ['x', 'y', 'z'] # Test setting both to None in constructor opman = OplogThread(primary_client=self.primary_conn, doc_managers=(DocManager(), ), oplog_progress_dict=LockingDict(), dest_mapping_stru=self.dest_mapping_stru, fields=None, exclude_fields=None) self._check_fields(opman, [], [], None) opman = OplogThread(primary_client=self.primary_conn, doc_managers=(DocManager(), ), oplog_progress_dict=LockingDict(), dest_mapping_stru=self.dest_mapping_stru, fields=None, exclude_fields=exclude_fields) self._check_fields(opman, [], exclude_fields, dict((f, 0) for f in exclude_fields)) # Test setting fields when exclude_fields is set self.assertRaises(errors.InvalidConfiguration, setattr, opman, "fields", fields) self.assertRaises(errors.InvalidConfiguration, setattr, opman, "fields", None) opman = OplogThread(primary_client=self.primary_conn, doc_managers=(DocManager(), ), oplog_progress_dict=LockingDict(), dest_mapping_stru=self.dest_mapping_stru, exclude_fields=None, fields=fields) self._check_fields(opman, fields, [], dict((f, 1) for f in fields)) self.assertRaises(errors.InvalidConfiguration, setattr, opman, "exclude_fields", exclude_fields) self.assertRaises(errors.InvalidConfiguration, setattr, opman, "exclude_fields", None) self.assertRaises(errors.InvalidConfiguration, OplogThread, self.primary_conn, (DocManager(), ), LockingDict(), self.dest_mapping_stru, fields=fields, exclude_fields=exclude_fields)
def setUp(self): self.repl_set = ReplicaSetSingle().start() self.primary_conn = self.repl_set.client() self.oplog_coll = self.primary_conn.local['oplog.rs'] self.dest_mapping_stru = DestMapping([], [], {}) self.opman = OplogThread( primary_client=self.primary_conn, doc_managers=(DocManager(),), oplog_progress_dict=LockingDict(), dest_mapping_stru=self.dest_mapping_stru, )
def test_dump_collection(self): """Test the dump_collection method Cases: 1. empty oplog 2. non-empty oplog, with gridfs collections 3. non-empty oplog, specified a namespace-set, none of the oplog entries are for collections in the namespace-set """ # Test with empty oplog self.opman.oplog = self.primary_conn["test"]["emptycollection"] last_ts = self.opman.dump_collection() self.assertEqual(last_ts, None) # Test with non-empty oplog with gridfs collections self.opman.oplog = self.primary_conn["local"]["oplog.rs"] # Insert 10 gridfs files for i in range(10): fs = gridfs.GridFS(self.primary_conn["gridfs"], collection="test" + str(i)) fs.put(b"hello world") # Insert 1000 documents for i in range(1000): self.primary_conn["test"]["test"].insert_one({ "i": i + 500 }) last_ts = self.opman.get_last_oplog_timestamp() self.assertEqual(last_ts, self.opman.dump_collection()) self.assertEqual(len(self.opman.doc_managers[0]._search()), 1010) # Case 3 # 1MB oplog so that we can rollover quickly repl_set = ReplicaSetSingle(oplogSize=1).start() conn = repl_set.client() opman = OplogThread( primary_client=conn, doc_managers=(DocManager(),), oplog_progress_dict=LockingDict(), namespace_config=NamespaceConfig(namespace_set=["test.test"]), ) # Insert a document into an included collection conn["test"]["test"].insert_one({"test": 1}) # Cause the oplog to rollover on a non-included collection while conn["local"]["oplog.rs"].find_one({"ns": "test.test"}): conn["test"]["ignored"].insert_many( [{"test": "1" * 1024} for _ in range(1024)]) last_ts = opman.get_last_oplog_timestamp() self.assertEqual(last_ts, opman.dump_collection()) self.assertEqual(len(opman.doc_managers[0]._search()), 1) conn.close() repl_set.stop()
def setUp(self): self.repl_set = ReplicaSetSingle().start() self.primary_conn = self.repl_set.client() self.oplog_coll = self.primary_conn.local["oplog.rs"] self.opman = OplogThread( primary_client=self.primary_conn, doc_managers=(DocManager(),), oplog_progress_dict=LockingDict(), namespace_config=NamespaceConfig( namespace_options={"test.*": True, "gridfs.*": {"gridfs": True}} ), )
def test_dump_collection(self): """Test the dump_collection method Cases: 1. empty oplog 2. non-empty oplog 3. non-empty oplog, specified a namespace-set, none of the oplog entries are for collections in the namespace-set """ # Test with empty oplog self.opman.oplog = self.primary_conn["test"]["emptycollection"] last_ts = self.opman.dump_collection() self.assertEqual(last_ts, None) # Test with non-empty oplog self.opman.oplog = self.primary_conn["local"]["oplog.rs"] for i in range(1000): self.primary_conn["test"]["test"].insert_one({ "i": i + 500 }) last_ts = self.opman.get_last_oplog_timestamp() self.assertEqual(last_ts, self.opman.dump_collection()) self.assertEqual(len(self.opman.doc_managers[0]._search()), 1000) # Case 3 # 1MB oplog so that we can rollover quickly repl_set = ReplicaSetSingle(oplogSize=1).start() conn = repl_set.client() dest_mapping_stru = DestMapping(["test.test"], [], {}) opman = OplogThread( primary_client=conn, doc_managers=(DocManager(),), oplog_progress_dict=LockingDict(), dest_mapping_stru=dest_mapping_stru, ns_set=set(["test.test"]) ) # Insert a document into a ns_set collection conn["test"]["test"].insert_one({"test": 1}) # Cause the oplog to rollover on a non-ns_set collection while conn["local"]["oplog.rs"].find_one({"ns": "test.test"}): conn["test"]["ignored"].insert_many( [{"test": "1" * 1024} for _ in range(1024)]) last_ts = opman.get_last_oplog_timestamp() self.assertEqual(last_ts, opman.dump_collection()) self.assertEqual(len(opman.doc_managers[0]._search()), 1) conn.close() repl_set.stop()
def setUp(self): _, _, self.primary_p = start_replica_set('test-oplog-manager') self.primary_conn = pymongo.MongoClient(mongo_host, self.primary_p) self.oplog_coll = self.primary_conn.local['oplog.rs'] self.opman = OplogThread(primary_conn=self.primary_conn, main_address='%s:%d' % (mongo_host, self.primary_p), oplog_coll=self.oplog_coll, is_sharded=False, doc_manager=DocManager(), oplog_progress_dict=LockingDict(), namespace_set=None, auth_key=None, auth_username=None, repl_set='test-oplog-manager')
def get_new_oplog(cls): """ Set up connection with mongo. Returns oplog, the connection and oplog collection This function does not clear the oplog """ primary_conn = Connection(HOSTNAME, int(PORTS_ONE["PRIMARY"])) if primary_conn['admin'].command("isMaster")['ismaster'] is False: primary_conn = Connection(HOSTNAME, int(PORTS_ONE["SECONDARY"])) mongos = "%s:%s" % (HOSTNAME, PORTS_ONE["MONGOS"]) oplog_coll = primary_conn['local']['oplog.rs'] namespace_set = ['test.test', 'alpha.foo'] doc_manager = DocManager() oplog = OplogThread(primary_conn, mongos, oplog_coll, True, doc_manager, LockingDict(), namespace_set, cls.AUTH_KEY, AUTH_USERNAME) return (oplog, primary_conn, oplog_coll, oplog.main_connection)
def reset_opman(self, include_ns=None, exclude_ns=None, dest_mapping=None): if include_ns is None: include_ns = [] if exclude_ns is None: exclude_ns = [] if dest_mapping is None: dest_mapping = {} # include_ns must not exist together with exclude_ns # dest_mapping must exist together with include_ns # those checks have been tested in test_config.py so we skip that here. self.dest_mapping_stru = DestMapping(include_ns, exclude_ns, dest_mapping) self.opman = OplogThread(primary_client=self.primary_conn, doc_managers=(DocManager(), ), oplog_progress_dict=LockingDict(), dest_mapping_stru=self.dest_mapping_stru, ns_set=include_ns, ex_ns_set=exclude_ns)
def setUp(self): # Create a new oplog progress file try: os.unlink("config.txt") except OSError: pass open("config.txt", "w").close() # Start a replica set _, self.secondary_p, self.primary_p = start_replica_set('rollbacks') # Connection to the replica set as a whole self.main_conn = MongoClient('%s:%d' % (mongo_host, self.primary_p), replicaSet='rollbacks') # Connection to the primary specifically self.primary_conn = MongoClient('%s:%d' % (mongo_host, self.primary_p)) # Connection to the secondary specifically self.secondary_conn = MongoClient( '%s:%d' % (mongo_host, self.secondary_p), read_preference=ReadPreference.SECONDARY_PREFERRED ) # Wipe any test data self.main_conn["test"]["mc"].drop() # Oplog thread doc_manager = DocManager() oplog_progress = LockingDict() self.opman = OplogThread( primary_conn=self.main_conn, main_address='%s:%d' % (mongo_host, self.primary_p), oplog_coll=self.main_conn["local"]["oplog.rs"], is_sharded=False, doc_manager=doc_manager, oplog_progress_dict=oplog_progress, namespace_set=["test.mc"], auth_key=None, auth_username=None, repl_set="rollbacks" )
def setUp(self): # Create a new oplog progress file try: os.unlink("config.txt") except OSError: pass open("config.txt", "w").close() # Start a replica set start_cluster(sharded=False, use_mongos=False) # Connection to the replica set as a whole self.main_conn = Connection("localhost:%s" % PORTS_ONE["PRIMARY"], replicaSet="demo-repl") # Connection to the primary specifically self.primary_conn = Connection("localhost:%s" % PORTS_ONE["PRIMARY"]) # Connection to the secondary specifically self.secondary_conn = Connection( "localhost:%s" % PORTS_ONE["SECONDARY"], read_preference=ReadPreference.SECONDARY_PREFERRED) # Wipe any test data self.main_conn["test"]["mc"].drop() # Oplog thread doc_manager = DocManager() oplog_progress = LockingDict() self.opman = OplogThread( primary_conn=self.main_conn, main_address="localhost:%s" % PORTS_ONE["PRIMARY"], oplog_coll=self.main_conn["local"]["oplog.rs"], is_sharded=False, doc_manager=doc_manager, oplog_progress_dict=oplog_progress, namespace_set=["test.mc"], auth_key=None, auth_username=None, repl_set="demo-repl")
def test_init_cursor(self): """Test the init_cursor method Cases: 1. no last checkpoint, no collection dump 2. no last checkpoint, collection dump ok and stuff to dump 3. no last checkpoint, nothing to dump, stuff in oplog 4. no last checkpoint, nothing to dump, nothing in oplog 5. no last checkpoint, no collection dump, stuff in oplog 6. last checkpoint exists 7. last checkpoint is behind """ # N.B. these sub-cases build off of each other and cannot be re-ordered # without side-effects # No last checkpoint, no collection dump, nothing in oplog # "change oplog collection" to put nothing in oplog self.opman.oplog = self.primary_conn["test"]["emptycollection"] self.opman.collection_dump = False self.assertTrue( all(doc['op'] == 'n' for doc in self.opman.init_cursor()[0])) self.assertEqual(self.opman.checkpoint, None) # No last checkpoint, empty collections, nothing in oplog self.opman.collection_dump = True cursor, cursor_len = self.opman.init_cursor() self.assertEqual(cursor, None) self.assertEqual(cursor_len, 0) self.assertEqual(self.opman.checkpoint, None) # No last checkpoint, empty collections, something in oplog self.opman.oplog = self.primary_conn['local']['oplog.rs'] collection = self.primary_conn["test"]["test"] collection.insert_one({"i": 1}) collection.delete_one({"i": 1}) time.sleep(3) last_ts = self.opman.get_last_oplog_timestamp() cursor, cursor_len = self.opman.init_cursor() self.assertEqual(cursor_len, 0) self.assertEqual(self.opman.checkpoint, last_ts) with self.opman.oplog_progress as prog: self.assertEqual(prog.get_dict()[str(self.opman.oplog)], last_ts) # No last checkpoint, no collection dump, something in oplog self.opman.oplog_progress = LockingDict() self.opman.collection_dump = False collection.insert_one({"i": 2}) last_ts = self.opman.get_last_oplog_timestamp() cursor, cursor_len = self.opman.init_cursor() for i in range(cursor_len - 1): next(cursor) self.assertEqual(next(cursor)['o']['i'], 2) self.assertEqual(self.opman.checkpoint, last_ts) # Last checkpoint exists progress = LockingDict() self.opman.oplog_progress = progress for i in range(1000): collection.insert_one({"i": i + 500}) entry = list(self.primary_conn["local"]["oplog.rs"].find(skip=200, limit=-2)) progress.get_dict()[str(self.opman.oplog)] = entry[0]["ts"] self.opman.oplog_progress = progress self.opman.checkpoint = None cursor, cursor_len = self.opman.init_cursor() self.assertEqual(next(cursor)["ts"], entry[1]["ts"]) self.assertEqual(self.opman.checkpoint, entry[0]["ts"]) with self.opman.oplog_progress as prog: self.assertEqual(prog.get_dict()[str(self.opman.oplog)], entry[0]["ts"]) # Last checkpoint is behind progress = LockingDict() progress.get_dict()[str(self.opman.oplog)] = bson.Timestamp(1, 0) self.opman.oplog_progress = progress self.opman.checkpoint = None cursor, cursor_len = self.opman.init_cursor() self.assertEqual(cursor_len, 0) self.assertEqual(cursor, None) self.assertIsNotNone(self.opman.checkpoint)
def setUp(self): self.dest_mapping_stru = DestMapping([], [], {}) self.opman = OplogThread(primary_client=self.primary_conn, doc_managers=(DocManager(), ), oplog_progress_dict=LockingDict(), dest_mapping_stru=self.dest_mapping_stru)
def test_fields_constructor(self): # Test with "_id" field in constructor fields = ["_id", "title", "content", "author"] opman = OplogThread(primary_client=self.primary_conn, doc_managers=(DocManager(), ), oplog_progress_dict=LockingDict(), dest_mapping_stru=self.dest_mapping_stru, fields=fields) self._check_fields(opman, fields, [], dict((f, 1) for f in fields)) extra_fields = fields + ['extra1', 'extra2'] filtered = opman.filter_oplog_entry({ 'op': 'i', 'o': dict((f, 1) for f in extra_fields) })['o'] self.assertEqual(dict((f, 1) for f in fields), filtered) # Test without "_id" field in constructor fields = ["title", "content", "author"] opman = OplogThread(primary_client=self.primary_conn, doc_managers=(DocManager(), ), oplog_progress_dict=LockingDict(), dest_mapping_stru=self.dest_mapping_stru, fields=fields) fields.append('_id') self._check_fields(opman, fields, [], dict((f, 1) for f in fields)) extra_fields = fields + ['extra1', 'extra2'] filtered = opman.filter_oplog_entry({ 'op': 'i', 'o': dict((f, 1) for f in extra_fields) })['o'] self.assertEqual(dict((f, 1) for f in fields), filtered) # Test with only "_id" field fields = ["_id"] opman = OplogThread(primary_client=self.primary_conn, doc_managers=(DocManager(), ), oplog_progress_dict=LockingDict(), dest_mapping_stru=self.dest_mapping_stru, fields=fields) self._check_fields(opman, fields, [], dict((f, 1) for f in fields)) extra_fields = fields + ['extra1', 'extra2'] filtered = opman.filter_oplog_entry({ 'op': 'i', 'o': dict((f, 1) for f in extra_fields) })['o'] self.assertEqual({'_id': 1}, filtered) # Test with no fields set opman = OplogThread(primary_client=self.primary_conn, doc_managers=(DocManager(), ), oplog_progress_dict=LockingDict(), dest_mapping_stru=self.dest_mapping_stru) self._check_fields(opman, [], [], None) extra_fields = ['_id', 'extra1', 'extra2'] filtered = opman.filter_oplog_entry({ 'op': 'i', 'o': dict((f, 1) for f in extra_fields) })['o'] self.assertEqual(dict((f, 1) for f in extra_fields), filtered)
def test_exclude_fields_constructor(self): # Test with the "_id" field in exclude_fields exclude_fields = ["_id", "title", "content", "author"] opman = OplogThread(primary_client=self.primary_conn, doc_managers=(DocManager(), ), oplog_progress_dict=LockingDict(), dest_mapping_stru=self.dest_mapping_stru, exclude_fields=exclude_fields) exclude_fields.remove('_id') self._check_fields(opman, [], exclude_fields, dict((f, 0) for f in exclude_fields)) extra_fields = exclude_fields + ['extra1', 'extra2'] filtered = opman.filter_oplog_entry({ 'op': 'i', 'o': dict((f, 1) for f in extra_fields) })['o'] self.assertEqual(dict((f, 1) for f in ['extra1', 'extra2']), filtered) # Test without "_id" field included in exclude_fields exclude_fields = ["title", "content", "author"] opman = OplogThread(primary_client=self.primary_conn, doc_managers=(DocManager(), ), oplog_progress_dict=LockingDict(), dest_mapping_stru=self.dest_mapping_stru, exclude_fields=exclude_fields) self._check_fields(opman, [], exclude_fields, dict((f, 0) for f in exclude_fields)) extra_fields = extra_fields + ['extra1', 'extra2'] filtered = opman.filter_oplog_entry({ 'op': 'i', 'o': dict((f, 1) for f in extra_fields) })['o'] self.assertEqual({'extra1': 1, 'extra2': 1}, filtered) # Test with only "_id" field in exclude_fields exclude_fields = ["_id"] opman = OplogThread(primary_client=self.primary_conn, doc_managers=(DocManager(), ), oplog_progress_dict=LockingDict(), dest_mapping_stru=self.dest_mapping_stru, exclude_fields=exclude_fields) self._check_fields(opman, [], [], None) extra_fields = exclude_fields + ['extra1', 'extra2'] filtered = opman.filter_oplog_entry({ 'op': 'i', 'o': dict((f, 1) for f in extra_fields) })['o'] self.assertEqual(dict((f, 1) for f in extra_fields), filtered) # Test with nothing set for exclude_fields opman = OplogThread(primary_client=self.primary_conn, doc_managers=(DocManager(), ), oplog_progress_dict=LockingDict(), dest_mapping_stru=self.dest_mapping_stru, exclude_fields=None) self._check_fields(opman, [], [], None) extra_fields = ['_id', 'extra1', 'extra2'] filtered = opman.filter_oplog_entry({ 'op': 'i', 'o': dict((f, 1) for f in extra_fields) })['o'] self.assertEqual(dict((f, 1) for f in extra_fields), filtered)
def __init__(self, mongo_address, doc_managers=None, **kwargs): super(Connector, self).__init__() # can_run is set to false when we join the thread self.can_run = True # The signal that caused the connector to stop or None self.signal = None # main address - either mongos for sharded setups or a primary otherwise self.address = mongo_address # connection to the main address self.main_conn = None # List of DocManager instances if doc_managers: self.doc_managers = doc_managers else: LOG.warning('No doc managers specified, using simulator.') self.doc_managers = (simulator.DocManager(),) # Password for authentication self.auth_key = kwargs.pop('auth_key', None) # Username for authentication self.auth_username = kwargs.pop('auth_username', None) # The name of the file that stores the progress of the OplogThreads self.oplog_checkpoint = kwargs.pop('oplog_checkpoint', 'oplog.timestamp') # The set of OplogThreads created self.shard_set = {} # Dict of OplogThread/timestamp pairs to record progress self.oplog_progress = LockingDict() # Timezone awareness self.tz_aware = kwargs.get('tz_aware', False) # SSL keyword arguments to MongoClient. ssl_certfile = kwargs.pop('ssl_certfile', None) ssl_ca_certs = kwargs.pop('ssl_ca_certs', None) ssl_keyfile = kwargs.pop('ssl_keyfile', None) ssl_cert_reqs = kwargs.pop('ssl_cert_reqs', None) self.ssl_kwargs = {} if ssl_certfile: self.ssl_kwargs['ssl_certfile'] = ssl_certfile if ssl_ca_certs: self.ssl_kwargs['ssl_ca_certs'] = ssl_ca_certs if ssl_keyfile: self.ssl_kwargs['ssl_keyfile'] = ssl_keyfile if ssl_cert_reqs: self.ssl_kwargs['ssl_cert_reqs'] = ssl_cert_reqs # Save the rest of kwargs. self.kwargs = kwargs # Replace the origin dest_mapping self.dest_mapping = DestMapping(kwargs.get('ns_set', []), kwargs.get('ex_ns_set', []), kwargs.get('dest_mapping', {})) # Initialize and set the command helper command_helper = CommandHelper(self.dest_mapping) for dm in self.doc_managers: dm.command_helper = command_helper if self.oplog_checkpoint is not None: if not os.path.exists(self.oplog_checkpoint): info_str = ("MongoConnector: Can't find %s, " "attempting to create an empty progress log" % self.oplog_checkpoint) LOG.warning(info_str) try: # Create oplog progress file open(self.oplog_checkpoint, "w").close() except IOError as e: LOG.critical("MongoConnector: Could not " "create a progress log: %s" % str(e)) sys.exit(2) else: if (not os.access(self.oplog_checkpoint, os.W_OK) and not os.access(self.oplog_checkpoint, os.R_OK)): LOG.critical("Invalid permissions on %s! Exiting" % (self.oplog_checkpoint)) sys.exit(2)
def setUp(self): self.repl_set = ReplicaSet().start() self.primary_conn = self.repl_set.client() self.oplog_progress = LockingDict() self.opman = None
def setUp(self): """ Initialize the cluster: Clean out the databases used by the tests Make connections to mongos, mongods Create and shard test collections Create OplogThreads """ self.cluster = ShardedCluster().start() # Connection to mongos self.mongos_conn = self.cluster.client() # Connections to the shards self.shard1_conn = self.cluster.shards[0].client() self.shard2_conn = self.cluster.shards[1].client() self.shard1_secondary_conn = self.cluster.shards[0].secondary.client( readPreference=ReadPreference.SECONDARY_PREFERRED) self.shard2_secondary_conn = self.cluster.shards[1].secondary.client( readPreference=ReadPreference.SECONDARY_PREFERRED) # Wipe any test data self.mongos_conn["test"]["mcsharded"].drop() # Create and shard the collection test.mcsharded on the "i" field self.mongos_conn["test"]["mcsharded"].ensure_index("i") self.mongos_conn.admin.command("enableSharding", "test") self.mongos_conn.admin.command("shardCollection", "test.mcsharded", key={"i": 1}) # Pre-split the collection so that: # i < 1000 lives on shard1 # i >= 1000 lives on shard2 self.mongos_conn.admin.command( bson.SON([("split", "test.mcsharded"), ("middle", { "i": 1000 })])) # disable the balancer self.mongos_conn.config.settings.update({"_id": "balancer"}, {"$set": { "stopped": True }}, upsert=True) # Move chunks to their proper places try: self.mongos_conn["admin"].command("moveChunk", "test.mcsharded", find={"i": 1}, to='demo-set-0') except pymongo.errors.OperationFailure: pass try: self.mongos_conn["admin"].command("moveChunk", "test.mcsharded", find={"i": 1000}, to='demo-set-1') except pymongo.errors.OperationFailure: pass # Make sure chunks are distributed correctly self.mongos_conn["test"]["mcsharded"].insert({"i": 1}) self.mongos_conn["test"]["mcsharded"].insert({"i": 1000}) def chunks_moved(): doc1 = self.shard1_conn.test.mcsharded.find_one() doc2 = self.shard2_conn.test.mcsharded.find_one() if None in (doc1, doc2): return False return doc1['i'] == 1 and doc2['i'] == 1000 assert_soon(chunks_moved, max_tries=120, message='chunks not moved? doc1=%r, doc2=%r' % (self.shard1_conn.test.mcsharded.find_one(), self.shard2_conn.test.mcsharded.find_one())) self.mongos_conn.test.mcsharded.remove() # create a new oplog progress file try: os.unlink("oplog.timestamp") except OSError: pass open("oplog.timestamp", "w").close() # Oplog threads (oplog manager) for each shard doc_manager = DocManager() oplog_progress = LockingDict() self.opman1 = OplogThread( primary_client=self.shard1_conn, doc_managers=(doc_manager, ), oplog_progress_dict=oplog_progress, namespace_set=["test.mcsharded", "test.mcunsharded"], mongos_client=self.mongos_conn) self.opman2 = OplogThread( primary_client=self.shard2_conn, doc_managers=(doc_manager, ), oplog_progress_dict=oplog_progress, namespace_set=["test.mcsharded", "test.mcunsharded"], mongos_client=self.mongos_conn)
def test_init_cursor(self): """Test the init_cursor method Cases: 1. no last checkpoint, no collection dump 2. no last checkpoint, collection dump ok and stuff to dump 3. no last checkpoint, nothing to dump, stuff in oplog 4. no last checkpoint, nothing to dump, nothing in oplog 5. no last checkpoint, no collection dump, stuff in oplog 6. last checkpoint exists 7. last checkpoint is behind """ # N.B. these sub-cases build off of each other and cannot be re-ordered # without side-effects # No last checkpoint, no collection dump, nothing in oplog # "change oplog collection" to put nothing in oplog self.opman1.oplog = self.shard1_conn["test"]["emptycollection"] self.opman2.oplog = self.shard2_conn["test"]["emptycollection"] self.opman1.collection_dump = False self.opman2.collection_dump = False self.assertTrue( all(doc['op'] == 'n' for doc in self.opman1.init_cursor()[0])) self.assertEqual(self.opman1.checkpoint, None) self.assertTrue( all(doc['op'] == 'n' for doc in self.opman2.init_cursor()[0])) self.assertEqual(self.opman2.checkpoint, None) # No last checkpoint, empty collections, nothing in oplog self.opman1.collection_dump = self.opman2.collection_dump = True cursor, cursor_len = self.opman1.init_cursor() self.assertEqual(cursor, None) self.assertEqual(cursor_len, 0) self.assertEqual(self.opman1.checkpoint, None) cursor, cursor_len = self.opman2.init_cursor() self.assertEqual(cursor, None) self.assertEqual(cursor_len, 0) self.assertEqual(self.opman2.checkpoint, None) # No last checkpoint, empty collections, something in oplog self.opman1.oplog = self.shard1_conn["local"]["oplog.rs"] self.opman2.oplog = self.shard2_conn["local"]["oplog.rs"] oplog_startup_ts = self.opman2.get_last_oplog_timestamp() collection = self.mongos_conn["test"]["mcsharded"] collection.insert({"i": 1}) collection.remove({"i": 1}) time.sleep(3) last_ts1 = self.opman1.get_last_oplog_timestamp() cursor, cursor_len = self.opman1.init_cursor() self.assertEqual(cursor_len, 0) self.assertEqual(self.opman1.checkpoint, last_ts1) with self.opman1.oplog_progress as prog: self.assertEqual(prog.get_dict()[str(self.opman1.oplog)], last_ts1) # init_cursor should point to startup message in shard2 oplog cursor, cursor_len = self.opman2.init_cursor() self.assertEqual(cursor_len, 0) self.assertEqual(self.opman2.checkpoint, oplog_startup_ts) # No last checkpoint, no collection dump, stuff in oplog progress = LockingDict() self.opman1.oplog_progress = self.opman2.oplog_progress = progress self.opman1.collection_dump = self.opman2.collection_dump = False collection.insert({"i": 1200}) last_ts2 = self.opman2.get_last_oplog_timestamp() self.opman1.init_cursor() self.assertEqual(self.opman1.checkpoint, last_ts1) with self.opman1.oplog_progress as prog: self.assertEqual(prog.get_dict()[str(self.opman1.oplog)], last_ts1) cursor, cursor_len = self.opman2.init_cursor() for i in range(cursor_len - 1): next(cursor) self.assertEqual(next(cursor)["o"]["i"], 1200) self.assertEqual(self.opman2.checkpoint, last_ts2) with self.opman2.oplog_progress as prog: self.assertEqual(prog.get_dict()[str(self.opman2.oplog)], last_ts2) # Last checkpoint exists progress = LockingDict() self.opman1.oplog_progress = self.opman2.oplog_progress = progress for i in range(1000): collection.insert({"i": i + 500}) entry1 = list(self.shard1_conn["local"]["oplog.rs"].find(skip=200, limit=2)) entry2 = list(self.shard2_conn["local"]["oplog.rs"].find(skip=200, limit=2)) progress.get_dict()[str(self.opman1.oplog)] = entry1[0]["ts"] progress.get_dict()[str(self.opman2.oplog)] = entry2[0]["ts"] self.opman1.oplog_progress = self.opman2.oplog_progress = progress self.opman1.checkpoint = self.opman2.checkpoint = None cursor1, cursor_len1 = self.opman1.init_cursor() cursor2, cursor_len2 = self.opman2.init_cursor() self.assertEqual(entry1[1]["ts"], next(cursor1)["ts"]) self.assertEqual(entry2[1]["ts"], next(cursor2)["ts"]) self.assertEqual(self.opman1.checkpoint, entry1[0]["ts"]) self.assertEqual(self.opman2.checkpoint, entry2[0]["ts"]) with self.opman1.oplog_progress as prog: self.assertEqual(prog.get_dict()[str(self.opman1.oplog)], entry1[0]["ts"]) with self.opman2.oplog_progress as prog: self.assertEqual(prog.get_dict()[str(self.opman2.oplog)], entry2[0]["ts"]) # Last checkpoint is behind progress = LockingDict() progress.get_dict()[str(self.opman1.oplog)] = bson.Timestamp(1, 0) progress.get_dict()[str(self.opman2.oplog)] = bson.Timestamp(1, 0) self.opman1.oplog_progress = self.opman2.oplog_progress = progress self.opman1.checkpoint = self.opman2.checkpoint = None cursor, cursor_len = self.opman1.init_cursor() self.assertEqual(cursor_len, 0) self.assertEqual(cursor, None) self.assertIsNotNone(self.opman1.checkpoint) cursor, cursor_len = self.opman2.init_cursor() self.assertEqual(cursor_len, 0) self.assertEqual(cursor, None) self.assertIsNotNone(self.opman2.checkpoint)
def setUp(self): """ Initialize the cluster: Clean out the databases used by the tests Make connections to mongos, mongods Create and shard test collections Create OplogThreads """ # Start the cluster with a mongos on port 27217 self.mongos_p = start_cluster() # Connection to mongos mongos_address = '%s:%d' % (mongo_host, self.mongos_p) self.mongos_conn = MongoClient(mongos_address) # Connections to the shards shard1_ports = get_shard(self.mongos_p, 0) shard2_ports = get_shard(self.mongos_p, 1) self.shard1_prim_p = shard1_ports['primary'] self.shard1_scnd_p = shard1_ports['secondaries'][0] self.shard2_prim_p = shard2_ports['primary'] self.shard2_scnd_p = shard2_ports['secondaries'][0] self.shard1_conn = MongoClient('%s:%d' % (mongo_host, self.shard1_prim_p), replicaSet="demo-set-0") self.shard2_conn = MongoClient('%s:%d' % (mongo_host, self.shard2_prim_p), replicaSet="demo-set-1") self.shard1_secondary_conn = MongoClient( '%s:%d' % (mongo_host, self.shard1_scnd_p), read_preference=ReadPreference.SECONDARY_PREFERRED ) self.shard2_secondary_conn = MongoClient( '%s:%d' % (mongo_host, self.shard2_scnd_p), read_preference=ReadPreference.SECONDARY_PREFERRED ) # Wipe any test data self.mongos_conn["test"]["mcsharded"].drop() # Create and shard the collection test.mcsharded on the "i" field self.mongos_conn["test"]["mcsharded"].ensure_index("i") self.mongos_conn.admin.command("enableSharding", "test") self.mongos_conn.admin.command("shardCollection", "test.mcsharded", key={"i": 1}) # Pre-split the collection so that: # i < 1000 lives on shard1 # i >= 1000 lives on shard2 self.mongos_conn.admin.command(bson.SON([ ("split", "test.mcsharded"), ("middle", {"i": 1000}) ])) # disable the balancer self.mongos_conn.config.settings.update( {"_id": "balancer"}, {"$set": {"stopped": True}}, upsert=True ) # Move chunks to their proper places try: self.mongos_conn["admin"].command( "moveChunk", "test.mcsharded", find={"i": 1}, to="demo-set-0" ) except pymongo.errors.OperationFailure: pass # chunk may already be on the correct shard try: self.mongos_conn["admin"].command( "moveChunk", "test.mcsharded", find={"i": 1000}, to="demo-set-1" ) except pymongo.errors.OperationFailure: pass # chunk may already be on the correct shard # Make sure chunks are distributed correctly self.mongos_conn["test"]["mcsharded"].insert({"i": 1}) self.mongos_conn["test"]["mcsharded"].insert({"i": 1000}) def chunks_moved(): doc1 = self.shard1_conn.test.mcsharded.find_one() doc2 = self.shard2_conn.test.mcsharded.find_one() if None in (doc1, doc2): return False return doc1['i'] == 1 and doc2['i'] == 1000 assert_soon(chunks_moved) self.mongos_conn.test.mcsharded.remove() # create a new oplog progress file try: os.unlink("config.txt") except OSError: pass open("config.txt", "w").close() # Oplog threads (oplog manager) for each shard doc_manager = DocManager() oplog_progress = LockingDict() self.opman1 = OplogThread( primary_conn=self.shard1_conn, main_address='%s:%d' % (mongo_host, self.mongos_p), oplog_coll=self.shard1_conn["local"]["oplog.rs"], is_sharded=True, doc_manager=doc_manager, oplog_progress_dict=oplog_progress, namespace_set=["test.mcsharded", "test.mcunsharded"], auth_key=None, auth_username=None ) self.opman2 = OplogThread( primary_conn=self.shard2_conn, main_address='%s:%d' % (mongo_host, self.mongos_p), oplog_coll=self.shard2_conn["local"]["oplog.rs"], is_sharded=True, doc_manager=doc_manager, oplog_progress_dict=oplog_progress, namespace_set=["test.mcsharded", "test.mcunsharded"], auth_key=None, auth_username=None )
def test_init_cursor(self): """Test the init_cursor method Cases: 1. no last checkpoint, no collection dump 2. no last checkpoint, collection dump ok and stuff to dump 3. no last checkpoint, nothing to dump, stuff in oplog 4. no last checkpoint, nothing to dump, nothing in oplog 5. no last checkpoint, no collection dump, stuff in oplog 6. last checkpoint exists 7. last checkpoint is behind """ # N.B. these sub-cases build off of each other and cannot be re-ordered # without side-effects self.reset_opman(["includedb1.*", "includedb2.includecol1"], [], {}) # No last checkpoint, no collection dump, nothing in oplog # "change oplog collection" to put nothing in oplog self.opman.oplog = self.primary_conn["includedb1"]["emptycollection"] self.opman.collection_dump = False self.assertTrue( all(doc['op'] == 'n' for doc in self.opman.init_cursor()[0])) self.assertEqual(self.opman.checkpoint, None) # No last checkpoint, empty collections, nothing in oplog self.opman.collection_dump = True cursor, cursor_empty = self.opman.init_cursor() self.assertEqual(cursor, None) self.assertTrue(cursor_empty) self.assertEqual(self.opman.checkpoint, None) # No last checkpoint, empty collections, something in oplog self.opman.oplog = self.primary_conn['local']['oplog.rs'] collection = self.primary_conn["includedb1"]["includecol1"] collection.insert_one({"idb1col1": 1}) collection.delete_one({"idb1col1": 1}) time.sleep(3) last_ts = self.opman.get_last_oplog_timestamp() cursor, cursor_empty = self.opman.init_cursor() self.assertFalse(cursor_empty) self.assertEqual(self.opman.checkpoint, last_ts) self.assertEqual(self.opman.read_last_checkpoint(), last_ts) # No last checkpoint, no collection dump, something in oplog # If collection dump is false the checkpoint should not be set self.opman.checkpoint = None self.opman.oplog_progress = LockingDict() self.opman.collection_dump = False collection.insert_one({"idb1col1": 2}) cursor, cursor_empty = self.opman.init_cursor() for doc in cursor: last_doc = doc self.assertEqual(last_doc['o']['idb1col1'], 2) self.assertIsNone(self.opman.checkpoint) # Last checkpoint exists collection.insert_many([{"idb1col1": i + 500} for i in range(1000)]) entry = list(self.primary_conn["local"]["oplog.rs"].find(skip=200, limit=-2)) self.opman.update_checkpoint(entry[0]["ts"]) cursor, cursor_empty = self.opman.init_cursor() self.assertEqual(next(cursor)["ts"], entry[1]["ts"]) self.assertEqual(self.opman.checkpoint, entry[0]["ts"]) self.assertEqual(self.opman.read_last_checkpoint(), entry[0]["ts"]) # Last checkpoint is behind self.opman.update_checkpoint(bson.Timestamp(1, 0)) cursor, cursor_empty = self.opman.init_cursor() self.assertTrue(cursor_empty) self.assertEqual(cursor, None) self.assertEqual(self.opman.checkpoint, bson.Timestamp(1, 0))