def test_skipped_oplog_entry_updates_checkpoint(self): repl_set = ReplicaSetSingle().start() conn = repl_set.client() opman = OplogThread( primary_client=conn, doc_managers=(DocManager(),), oplog_progress_dict=LockingDict(), namespace_config=NamespaceConfig(namespace_set=["test.test"]), ) opman.start() # Insert a document into an included collection conn["test"]["test"].insert_one({"test": 1}) last_ts = opman.get_last_oplog_timestamp() assert_soon( lambda: last_ts == opman.checkpoint, "OplogThread never updated checkpoint to non-skipped " "entry.", ) self.assertEqual(len(opman.doc_managers[0]._search()), 1) # Make sure that the oplog thread updates its checkpoint on every # oplog entry. conn["test"]["ignored"].insert_one({"test": 1}) last_ts = opman.get_last_oplog_timestamp() assert_soon( lambda: last_ts == opman.checkpoint, "OplogThread never updated checkpoint to skipped entry.", ) opman.join() conn.close() repl_set.stop()
class TestOplogManagerSharded(unittest.TestCase): """Defines all test cases for OplogThreads running on a sharded cluster """ def setUp(self): """ Initialize the cluster: Clean out the databases used by the tests Make connections to mongos, mongods Create and shard test collections Create OplogThreads """ self.cluster = ShardedCluster().start() # Connection to mongos self.mongos_conn = self.cluster.client() # Connections to the shards self.shard1_conn = self.cluster.shards[0].client() self.shard2_conn = self.cluster.shards[1].client() self.shard1_secondary_conn = self.cluster.shards[0].secondary.client( read_preference=ReadPreference.SECONDARY_PREFERRED) self.shard2_secondary_conn = self.cluster.shards[1].secondary.client( read_preference=ReadPreference.SECONDARY_PREFERRED ) # Wipe any test data self.mongos_conn["test"]["mcsharded"].drop() # Create and shard the collection test.mcsharded on the "i" field self.mongos_conn["test"]["mcsharded"].create_index("i") self.mongos_conn.admin.command("enableSharding", "test") self.mongos_conn.admin.command("shardCollection", "test.mcsharded", key={"i": 1}) # Pre-split the collection so that: # i < 1000 lives on shard1 # i >= 1000 lives on shard2 self.mongos_conn.admin.command(bson.SON([ ("split", "test.mcsharded"), ("middle", {"i": 1000}) ])) # disable the balancer self.mongos_conn.config.settings.update_one( {"_id": "balancer"}, {"$set": {"stopped": True}}, upsert=True ) # Move chunks to their proper places try: self.mongos_conn["admin"].command( "moveChunk", "test.mcsharded", find={"i": 1}, to='demo-set-0' ) except pymongo.errors.OperationFailure: pass try: self.mongos_conn["admin"].command( "moveChunk", "test.mcsharded", find={"i": 1000}, to='demo-set-1' ) except pymongo.errors.OperationFailure: pass # Make sure chunks are distributed correctly self.mongos_conn["test"]["mcsharded"].insert_one({"i": 1}) self.mongos_conn["test"]["mcsharded"].insert_one({"i": 1000}) def chunks_moved(): doc1 = self.shard1_conn.test.mcsharded.find_one() doc2 = self.shard2_conn.test.mcsharded.find_one() if None in (doc1, doc2): return False return doc1['i'] == 1 and doc2['i'] == 1000 assert_soon(chunks_moved, max_tries=120, message='chunks not moved? doc1=%r, doc2=%r' % ( self.shard1_conn.test.mcsharded.find_one(), self.shard2_conn.test.mcsharded.find_one())) self.mongos_conn.test.mcsharded.delete_many({}) # create a new oplog progress file try: os.unlink("oplog.timestamp") except OSError: pass open("oplog.timestamp", "w").close() # Oplog threads (oplog manager) for each shard doc_manager = DocManager() oplog_progress = LockingDict() self.opman1 = OplogThread( primary_client=self.shard1_conn, doc_managers=(doc_manager,), oplog_progress_dict=oplog_progress, ns_set=["test.mcsharded", "test.mcunsharded"], mongos_client=self.mongos_conn ) self.opman2 = OplogThread( primary_client=self.shard2_conn, doc_managers=(doc_manager,), oplog_progress_dict=oplog_progress, ns_set=["test.mcsharded", "test.mcunsharded"], mongos_client=self.mongos_conn ) def tearDown(self): try: self.opman1.join() except RuntimeError: pass # thread may not have been started try: self.opman2.join() except RuntimeError: pass # thread may not have been started close_client(self.mongos_conn) close_client(self.shard1_conn) close_client(self.shard2_conn) close_client(self.shard1_secondary_conn) close_client(self.shard2_secondary_conn) self.cluster.stop() def test_get_oplog_cursor(self): """Test the get_oplog_cursor method""" # timestamp = None cursor1 = self.opman1.get_oplog_cursor(None) oplog1 = self.shard1_conn["local"]["oplog.rs"].find( {'op': {'$ne': 'n'}, 'ns': {'$not': re.compile(r'\.system')}}) self.assertEqual(list(cursor1), list(oplog1)) cursor2 = self.opman2.get_oplog_cursor(None) oplog2 = self.shard2_conn["local"]["oplog.rs"].find( {'op': {'$ne': 'n'}, 'ns': {'$not': re.compile(r'\.system')}}) self.assertEqual(list(cursor2), list(oplog2)) # earliest entry is the only one at/after timestamp doc = {"ts": bson.Timestamp(1000, 0), "i": 1} self.mongos_conn["test"]["mcsharded"].insert_one(doc) latest_timestamp = self.opman1.get_last_oplog_timestamp() cursor = self.opman1.get_oplog_cursor(latest_timestamp) self.assertNotEqual(cursor, None) self.assertEqual(cursor.count(), 1) next_entry_id = cursor[0]['o']['_id'] retrieved = self.mongos_conn.test.mcsharded.find_one(next_entry_id) self.assertEqual(retrieved, doc) # many entries before and after timestamp for i in range(2, 2002): self.mongos_conn["test"]["mcsharded"].insert_one({ "i": i }) oplog1 = self.shard1_conn["local"]["oplog.rs"].find( sort=[("ts", pymongo.ASCENDING)] ) oplog2 = self.shard2_conn["local"]["oplog.rs"].find( sort=[("ts", pymongo.ASCENDING)] ) # oplogs should have records for inserts performed, plus # various other messages oplog1_count = oplog1.count() oplog2_count = oplog2.count() self.assertGreaterEqual(oplog1_count, 998) self.assertGreaterEqual(oplog2_count, 1002) pivot1 = oplog1.skip(400).limit(-1)[0] pivot2 = oplog2.skip(400).limit(-1)[0] cursor1 = self.opman1.get_oplog_cursor(pivot1["ts"]) cursor2 = self.opman2.get_oplog_cursor(pivot2["ts"]) self.assertEqual(cursor1.count(), oplog1_count - 400) self.assertEqual(cursor2.count(), oplog2_count - 400) def test_get_last_oplog_timestamp(self): """Test the get_last_oplog_timestamp method""" # "empty" the oplog self.opman1.oplog = self.shard1_conn["test"]["emptycollection"] self.opman2.oplog = self.shard2_conn["test"]["emptycollection"] self.assertEqual(self.opman1.get_last_oplog_timestamp(), None) self.assertEqual(self.opman2.get_last_oplog_timestamp(), None) # Test non-empty oplog self.opman1.oplog = self.shard1_conn["local"]["oplog.rs"] self.opman2.oplog = self.shard2_conn["local"]["oplog.rs"] for i in range(1000): self.mongos_conn["test"]["mcsharded"].insert_one({ "i": i + 500 }) oplog1 = self.shard1_conn["local"]["oplog.rs"] oplog1 = oplog1.find().sort("$natural", pymongo.DESCENDING).limit(-1)[0] oplog2 = self.shard2_conn["local"]["oplog.rs"] oplog2 = oplog2.find().sort("$natural", pymongo.DESCENDING).limit(-1)[0] self.assertEqual(self.opman1.get_last_oplog_timestamp(), oplog1["ts"]) self.assertEqual(self.opman2.get_last_oplog_timestamp(), oplog2["ts"]) def test_dump_collection(self): """Test the dump_collection method Cases: 1. empty oplog 2. non-empty oplog """ # Test with empty oplog self.opman1.oplog = self.shard1_conn["test"]["emptycollection"] self.opman2.oplog = self.shard2_conn["test"]["emptycollection"] last_ts1 = self.opman1.dump_collection() last_ts2 = self.opman2.dump_collection() self.assertEqual(last_ts1, None) self.assertEqual(last_ts2, None) # Test with non-empty oplog self.opman1.oplog = self.shard1_conn["local"]["oplog.rs"] self.opman2.oplog = self.shard2_conn["local"]["oplog.rs"] for i in range(1000): self.mongos_conn["test"]["mcsharded"].insert_one({ "i": i + 500 }) last_ts1 = self.opman1.get_last_oplog_timestamp() last_ts2 = self.opman2.get_last_oplog_timestamp() self.assertEqual(last_ts1, self.opman1.dump_collection()) self.assertEqual(last_ts2, self.opman2.dump_collection()) self.assertEqual(len(self.opman1.doc_managers[0]._search()), 1000) def test_init_cursor(self): """Test the init_cursor method Cases: 1. no last checkpoint, no collection dump 2. no last checkpoint, collection dump ok and stuff to dump 3. no last checkpoint, nothing to dump, stuff in oplog 4. no last checkpoint, nothing to dump, nothing in oplog 5. no last checkpoint, no collection dump, stuff in oplog 6. last checkpoint exists 7. last checkpoint is behind """ # N.B. these sub-cases build off of each other and cannot be re-ordered # without side-effects # No last checkpoint, no collection dump, nothing in oplog # "change oplog collection" to put nothing in oplog self.opman1.oplog = self.shard1_conn["test"]["emptycollection"] self.opman2.oplog = self.shard2_conn["test"]["emptycollection"] self.opman1.collection_dump = False self.opman2.collection_dump = False self.assertTrue(all(doc['op'] == 'n' for doc in self.opman1.init_cursor()[0])) self.assertEqual(self.opman1.checkpoint, None) self.assertTrue(all(doc['op'] == 'n' for doc in self.opman2.init_cursor()[0])) self.assertEqual(self.opman2.checkpoint, None) # No last checkpoint, empty collections, nothing in oplog self.opman1.collection_dump = self.opman2.collection_dump = True cursor, cursor_len = self.opman1.init_cursor() self.assertEqual(cursor, None) self.assertEqual(cursor_len, 0) self.assertEqual(self.opman1.checkpoint, None) cursor, cursor_len = self.opman2.init_cursor() self.assertEqual(cursor, None) self.assertEqual(cursor_len, 0) self.assertEqual(self.opman2.checkpoint, None) # No last checkpoint, empty collections, something in oplog self.opman1.oplog = self.shard1_conn["local"]["oplog.rs"] self.opman2.oplog = self.shard2_conn["local"]["oplog.rs"] oplog_startup_ts = self.opman2.get_last_oplog_timestamp() collection = self.mongos_conn["test"]["mcsharded"] collection.insert_one({"i": 1}) collection.delete_one({"i": 1}) time.sleep(3) last_ts1 = self.opman1.get_last_oplog_timestamp() cursor, cursor_len = self.opman1.init_cursor() self.assertEqual(cursor_len, 0) self.assertEqual(self.opman1.checkpoint, last_ts1) with self.opman1.oplog_progress as prog: self.assertEqual(prog.get_dict()[str(self.opman1.oplog)], last_ts1) # init_cursor should point to startup message in shard2 oplog cursor, cursor_len = self.opman2.init_cursor() self.assertEqual(cursor_len, 0) self.assertEqual(self.opman2.checkpoint, oplog_startup_ts) # No last checkpoint, no collection dump, stuff in oplog progress = LockingDict() self.opman1.oplog_progress = self.opman2.oplog_progress = progress self.opman1.collection_dump = self.opman2.collection_dump = False collection.insert_one({"i": 1200}) last_ts2 = self.opman2.get_last_oplog_timestamp() self.opman1.init_cursor() self.assertEqual(self.opman1.checkpoint, last_ts1) with self.opman1.oplog_progress as prog: self.assertEqual(prog.get_dict()[str(self.opman1.oplog)], last_ts1) cursor, cursor_len = self.opman2.init_cursor() for i in range(cursor_len - 1): next(cursor) self.assertEqual(next(cursor)["o"]["i"], 1200) self.assertEqual(self.opman2.checkpoint, last_ts2) with self.opman2.oplog_progress as prog: self.assertEqual(prog.get_dict()[str(self.opman2.oplog)], last_ts2) # Last checkpoint exists progress = LockingDict() self.opman1.oplog_progress = self.opman2.oplog_progress = progress for i in range(1000): collection.insert_one({"i": i + 500}) entry1 = list( self.shard1_conn["local"]["oplog.rs"].find(skip=200, limit=-2)) entry2 = list( self.shard2_conn["local"]["oplog.rs"].find(skip=200, limit=-2)) progress.get_dict()[str(self.opman1.oplog)] = entry1[0]["ts"] progress.get_dict()[str(self.opman2.oplog)] = entry2[0]["ts"] self.opman1.oplog_progress = self.opman2.oplog_progress = progress self.opman1.checkpoint = self.opman2.checkpoint = None cursor1, cursor_len1 = self.opman1.init_cursor() cursor2, cursor_len2 = self.opman2.init_cursor() self.assertEqual(entry1[1]["ts"], next(cursor1)["ts"]) self.assertEqual(entry2[1]["ts"], next(cursor2)["ts"]) self.assertEqual(self.opman1.checkpoint, entry1[0]["ts"]) self.assertEqual(self.opman2.checkpoint, entry2[0]["ts"]) with self.opman1.oplog_progress as prog: self.assertEqual(prog.get_dict()[str(self.opman1.oplog)], entry1[0]["ts"]) with self.opman2.oplog_progress as prog: self.assertEqual(prog.get_dict()[str(self.opman2.oplog)], entry2[0]["ts"]) # Last checkpoint is behind progress = LockingDict() progress.get_dict()[str(self.opman1.oplog)] = bson.Timestamp(1, 0) progress.get_dict()[str(self.opman2.oplog)] = bson.Timestamp(1, 0) self.opman1.oplog_progress = self.opman2.oplog_progress = progress self.opman1.checkpoint = self.opman2.checkpoint = None cursor, cursor_len = self.opman1.init_cursor() self.assertEqual(cursor_len, 0) self.assertEqual(cursor, None) self.assertIsNotNone(self.opman1.checkpoint) cursor, cursor_len = self.opman2.init_cursor() self.assertEqual(cursor_len, 0) self.assertEqual(cursor, None) self.assertIsNotNone(self.opman2.checkpoint) def test_rollback(self): """Test the rollback method in a sharded environment Cases: 1. Documents on both shards, rollback on one shard 2. Documents on both shards, rollback on both shards """ self.opman1.start() self.opman2.start() # Insert first documents while primaries are up db_main = self.mongos_conn["test"]["mcsharded"] db_main2 = db_main.with_options(write_concern=WriteConcern(w=2)) db_main2.insert_one({"i": 0}) db_main2.insert_one({"i": 1000}) self.assertEqual(self.shard1_conn["test"]["mcsharded"].count(), 1) self.assertEqual(self.shard2_conn["test"]["mcsharded"].count(), 1) # Case 1: only one primary goes down, shard1 in this case self.cluster.shards[0].primary.stop(destroy=False) # Wait for the secondary to be promoted shard1_secondary_admin = self.shard1_secondary_conn["admin"] assert_soon( lambda: shard1_secondary_admin.command("isMaster")["ismaster"]) # Insert another document. This will be rolled back later def cond(): try: db_main.insert_one({"i": 1}) except: pass return db_main.find_one({"i": 1}) retry_until_ok(cond) db_secondary1 = self.shard1_secondary_conn["test"]["mcsharded"] db_secondary2 = self.shard2_secondary_conn["test"]["mcsharded"] self.assertEqual(db_secondary1.count(), 2) # Wait for replication on the doc manager # Note that both OplogThreads share the same doc manager c = lambda: len(self.opman1.doc_managers[0]._search()) == 3 assert_soon(c, "not all writes were replicated to doc manager", max_tries=120) # Kill the new primary self.cluster.shards[0].secondary.stop(destroy=False) # Start both servers back up self.cluster.shards[0].primary.start() primary_admin = self.shard1_conn["admin"] c = lambda: primary_admin.command("isMaster")["ismaster"] assert_soon(lambda: retry_until_ok(c)) self.cluster.shards[0].secondary.start() secondary_admin = self.shard1_secondary_conn["admin"] c = lambda: secondary_admin.command("replSetGetStatus")["myState"] == 2 assert_soon(c) query = {"i": {"$lt": 1000}} assert_soon(lambda: retry_until_ok(db_main.find(query).count) > 0) # Only first document should exist in MongoDB self.assertEqual(db_main.find(query).count(), 1) self.assertEqual(db_main.find_one(query)["i"], 0) def check_docman_rollback(): docman_docs = [d for d in self.opman1.doc_managers[0]._search() if d["i"] < 1000] return len(docman_docs) == 1 and docman_docs[0]["i"] == 0 assert_soon(check_docman_rollback, "doc manager did not roll back") # Wait for previous rollback to complete. # Insert/delete one document to jump-start replication to secondaries # in MongoDB 3.x. db_main.insert_one({'i': -1}) db_main.delete_one({'i': -1}) def rollback_done(): secondary1_count = retry_until_ok(db_secondary1.count) secondary2_count = retry_until_ok(db_secondary2.count) return (1, 1) == (secondary1_count, secondary2_count) assert_soon(rollback_done, "rollback never replicated to one or more secondaries") ############################## # Case 2: Primaries on both shards go down self.cluster.shards[0].primary.stop(destroy=False) self.cluster.shards[1].primary.stop(destroy=False) # Wait for the secondaries to be promoted shard1_secondary_admin = self.shard1_secondary_conn["admin"] shard2_secondary_admin = self.shard2_secondary_conn["admin"] assert_soon( lambda: shard1_secondary_admin.command("isMaster")["ismaster"]) assert_soon( lambda: shard2_secondary_admin.command("isMaster")["ismaster"]) # Insert another document on each shard. These will be rolled back later retry_until_ok(db_main.insert_one, {"i": 1}) self.assertEqual(db_secondary1.count(), 2) retry_until_ok(db_main.insert_one, {"i": 1001}) self.assertEqual(db_secondary2.count(), 2) # Wait for replication on the doc manager c = lambda: len(self.opman1.doc_managers[0]._search()) == 4 assert_soon(c, "not all writes were replicated to doc manager") # Kill the new primaries self.cluster.shards[0].secondary.stop(destroy=False) self.cluster.shards[1].secondary.stop(destroy=False) # Start the servers back up... # Shard 1 self.cluster.shards[0].primary.start() c = lambda: self.shard1_conn['admin'].command("isMaster")["ismaster"] assert_soon(lambda: retry_until_ok(c)) self.cluster.shards[0].secondary.start() secondary_admin = self.shard1_secondary_conn["admin"] c = lambda: secondary_admin.command("replSetGetStatus")["myState"] == 2 assert_soon(c) # Shard 2 self.cluster.shards[1].primary.start() c = lambda: self.shard2_conn['admin'].command("isMaster")["ismaster"] assert_soon(lambda: retry_until_ok(c)) self.cluster.shards[1].secondary.start() secondary_admin = self.shard2_secondary_conn["admin"] c = lambda: secondary_admin.command("replSetGetStatus")["myState"] == 2 assert_soon(c) # Wait for the shards to come online assert_soon(lambda: retry_until_ok(db_main.find(query).count) > 0) query2 = {"i": {"$gte": 1000}} assert_soon(lambda: retry_until_ok(db_main.find(query2).count) > 0) # Only first documents should exist in MongoDB self.assertEqual(db_main.find(query).count(), 1) self.assertEqual(db_main.find_one(query)["i"], 0) self.assertEqual(db_main.find(query2).count(), 1) self.assertEqual(db_main.find_one(query2)["i"], 1000) # Same should hold for the doc manager assert_soon(lambda: len(self.opman1.doc_managers[0]._search()) == 2) i_values = [d["i"] for d in self.opman1.doc_managers[0]._search()] self.assertIn(0, i_values) self.assertIn(1000, i_values) def test_with_chunk_migration(self): """Test that DocManagers have proper state after both a successful and an unsuccessful chunk migration """ # Start replicating to dummy doc managers self.opman1.start() self.opman2.start() collection = self.mongos_conn["test"]["mcsharded"] for i in range(1000): collection.insert_one({"i": i + 500}) # Assert current state of the mongoverse self.assertEqual(self.shard1_conn["test"]["mcsharded"].find().count(), 500) self.assertEqual(self.shard2_conn["test"]["mcsharded"].find().count(), 500) assert_soon(lambda: len(self.opman1.doc_managers[0]._search()) == 1000) # Test successful chunk move from shard 1 to shard 2 self.mongos_conn["admin"].command( "moveChunk", "test.mcsharded", find={"i": 1}, to="demo-set-1" ) # doc manager should still have all docs all_docs = self.opman1.doc_managers[0]._search() self.assertEqual(len(all_docs), 1000) for i, doc in enumerate(sorted(all_docs, key=lambda x: x["i"])): self.assertEqual(doc["i"], i + 500) # Mark the collection as "dropped". This will cause migration to fail. self.mongos_conn["config"]["collections"].update_one( {"_id": "test.mcsharded"}, {"$set": {"dropped": True}} ) # Test unsuccessful chunk move from shard 2 to shard 1 def fail_to_move_chunk(): self.mongos_conn["admin"].command( "moveChunk", "test.mcsharded", find={"i": 1}, to="demo-set-0" ) self.assertRaises(pymongo.errors.OperationFailure, fail_to_move_chunk) # doc manager should still have all docs all_docs = self.opman1.doc_managers[0]._search() self.assertEqual(len(all_docs), 1000) for i, doc in enumerate(sorted(all_docs, key=lambda x: x["i"])): self.assertEqual(doc["i"], i + 500) def test_with_orphan_documents(self): """Test that DocManagers have proper state after a chunk migration that resuts in orphaned documents. """ # Start replicating to dummy doc managers self.opman1.start() self.opman2.start() collection = self.mongos_conn["test"]["mcsharded"] collection.insert_many([{"i": i + 500} for i in range(1000)]) # Assert current state of the mongoverse self.assertEqual(self.shard1_conn["test"]["mcsharded"].find().count(), 500) self.assertEqual(self.shard2_conn["test"]["mcsharded"].find().count(), 500) assert_soon(lambda: len(self.opman1.doc_managers[0]._search()) == 1000) # Stop replication using the 'rsSyncApplyStop' failpoint self.shard1_conn.admin.command( "configureFailPoint", "rsSyncApplyStop", mode="alwaysOn" ) # Move a chunk from shard2 to shard1 def move_chunk(): try: self.mongos_conn["admin"].command( "moveChunk", "test.mcsharded", find={"i": 1000}, to="demo-set-0" ) except pymongo.errors.OperationFailure: pass # moveChunk will never complete, so use another thread to continue mover = threading.Thread(target=move_chunk) mover.start() # wait for documents to start moving to shard 1 assert_soon(lambda: self.shard1_conn.test.mcsharded.count() > 500) # Get opid for moveChunk command operations = self.mongos_conn.test.current_op() opid = None for op in operations["inprog"]: if op.get("query", {}).get("moveChunk"): opid = op["opid"] if opid is None: raise SkipTest("could not find moveChunk operation, cannot test " "failed moveChunk") # Kill moveChunk with the opid if self.mongos_conn.server_info()['versionArray'][:3] >= [3, 1, 2]: self.mongos_conn.admin.command('killOp', op=opid) else: self.mongos_conn["test"]["$cmd.sys.killop"].find_one({"op": opid}) # Mongo Connector should not become confused by unsuccessful chunk move docs = self.opman1.doc_managers[0]._search() self.assertEqual(len(docs), 1000) self.assertEqual(sorted(d["i"] for d in docs), list(range(500, 1500))) self.shard1_conn.admin.command( "configureFailPoint", "rsSyncApplyStop", mode="off" ) # cleanup mover.join()
class TestFilterFields(unittest.TestCase): @classmethod def setUpClass(cls): cls.repl_set = ReplicaSetSingle().start() cls.primary_conn = cls.repl_set.client() cls.oplog_coll = cls.primary_conn.local['oplog.rs'] @classmethod def tearDownClass(cls): cls.primary_conn.drop_database("test") close_client(cls.primary_conn) cls.repl_set.stop() def setUp(self): self.namespace_config = NamespaceConfig() self.opman = OplogThread(primary_client=self.primary_conn, doc_managers=(DocManager(), ), oplog_progress_dict=LockingDict(), namespace_config=self.namespace_config) def tearDown(self): try: self.opman.join() except RuntimeError: # OplogThread may not have been started pass def reset_include_fields(self, fields): self.opman.namespace_config = NamespaceConfig(include_fields=fields) def reset_exclude_fields(self, fields): self.opman.namespace_config = NamespaceConfig(exclude_fields=fields) def test_filter_fields(self): docman = self.opman.doc_managers[0] conn = self.opman.primary_client include_fields = ["a", "b", "c"] exclude_fields = ["d", "e", "f"] # Set fields to care about self.reset_include_fields(include_fields) # Documents have more than just these fields doc = {"a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6, "_id": 1} db = conn['test']['test'] db.insert_one(doc) assert_soon(lambda: db.count() == 1) self.opman.dump_collection() result = docman._search()[0] keys = result.keys() for inc, exc in zip(include_fields, exclude_fields): self.assertIn(inc, keys) self.assertNotIn(exc, keys) def test_filter_exclude_oplog_entry(self): # Test oplog entries: these are callables, since # filter_oplog_entry modifies the oplog entry in-place insert_op = lambda: { "op": "i", "o": { "_id": 0, "a": 1, "b": 2, "c": 3 } } update_op = lambda: { "op": "u", "o": { "$set": { "a": 4, "b": 5 }, "$unset": { "c": True } }, "o2": { "_id": 1 } } def filter_doc(document, fields): if fields and '_id' in fields: fields.remove('_id') return self.opman.filter_oplog_entry(document, exclude_fields=fields) # Case 0: insert op, no fields provided filtered = filter_doc(insert_op(), None) self.assertEqual(filtered, insert_op()) # Case 1: insert op, fields provided filtered = filter_doc(insert_op(), ['c']) self.assertEqual(filtered['o'], {'_id': 0, 'a': 1, 'b': 2}) # Case 2: insert op, fields provided, doc becomes empty except for _id filtered = filter_doc(insert_op(), ['a', 'b', 'c']) self.assertEqual(filtered['o'], {'_id': 0}) # Case 3: update op, no fields provided filtered = filter_doc(update_op(), None) self.assertEqual(filtered, update_op()) # Case 4: update op, fields provided filtered = filter_doc(update_op(), ['b']) self.assertNotIn('b', filtered['o']['$set']) self.assertIn('a', filtered['o']['$set']) self.assertEqual(filtered['o']['$unset'], update_op()['o']['$unset']) # Case 5: update op, fields provided, empty $set filtered = filter_doc(update_op(), ['a', 'b']) self.assertNotIn('$set', filtered['o']) self.assertEqual(filtered['o']['$unset'], update_op()['o']['$unset']) # Case 6: update op, fields provided, empty $unset filtered = filter_doc(update_op(), ['c']) self.assertNotIn('$unset', filtered['o']) self.assertEqual(filtered['o']['$set'], update_op()['o']['$set']) # Case 7: update op, fields provided, entry is nullified filtered = filter_doc(update_op(), ['a', 'b', 'c']) self.assertEqual(filtered, None) # Case 8: update op, fields provided, replacement filtered = filter_doc( { 'op': 'u', 'o': { 'a': 1, 'b': 2, 'c': 3, 'd': 4 } }, ['d', 'e', 'f']) self.assertEqual(filtered, {'op': 'u', 'o': {'a': 1, 'b': 2, 'c': 3}}) def test_filter_oplog_entry(self): # Test oplog entries: these are callables, since # filter_oplog_entry modifies the oplog entry in-place insert_op = lambda: { "op": "i", "o": { "_id": 0, "a": 1, "b": 2, "c": 3 } } update_op = lambda: { "op": "u", "o": { "$set": { "a": 4, "b": 5 }, "$unset": { "c": True } }, "o2": { "_id": 1 } } def filter_doc(document, fields): if fields and '_id' not in fields: fields.append('_id') return self.opman.filter_oplog_entry(document, include_fields=fields) # Case 0: insert op, no fields provided filtered = filter_doc(insert_op(), None) self.assertEqual(filtered, insert_op()) # Case 1: insert op, fields provided filtered = filter_doc(insert_op(), ['a', 'b']) self.assertEqual(filtered['o'], {'_id': 0, 'a': 1, 'b': 2}) # Case 2: insert op, fields provided, doc becomes empty except for _id filtered = filter_doc(insert_op(), ['d', 'e', 'f']) self.assertEqual(filtered['o'], {'_id': 0}) # Case 3: update op, no fields provided filtered = filter_doc(update_op(), None) self.assertEqual(filtered, update_op()) # Case 4: update op, fields provided filtered = filter_doc(update_op(), ['a', 'c']) self.assertNotIn('b', filtered['o']['$set']) self.assertIn('a', filtered['o']['$set']) self.assertEqual(filtered['o']['$unset'], update_op()['o']['$unset']) # Case 5: update op, fields provided, empty $set filtered = filter_doc(update_op(), ['c']) self.assertNotIn('$set', filtered['o']) self.assertEqual(filtered['o']['$unset'], update_op()['o']['$unset']) # Case 6: update op, fields provided, empty $unset filtered = filter_doc(update_op(), ['a', 'b']) self.assertNotIn('$unset', filtered['o']) self.assertEqual(filtered['o']['$set'], update_op()['o']['$set']) # Case 7: update op, fields provided, entry is nullified filtered = filter_doc(update_op(), ['d', 'e', 'f']) self.assertEqual(filtered, None) # Case 8: update op, fields provided, replacement filtered = filter_doc( { 'op': 'u', 'o': { 'a': 1, 'b': 2, 'c': 3, 'd': 4 } }, ['a', 'b', 'c']) self.assertEqual(filtered, {'op': 'u', 'o': {'a': 1, 'b': 2, 'c': 3}}) def test_nested_fields(self): def check_nested(document, fields, filtered_document, op='i'): if '_id' not in fields: fields.append('_id') filtered_result = self.opman.filter_oplog_entry( { 'op': op, 'o': document }, include_fields=fields) if filtered_result is not None: filtered_result = filtered_result['o'] self.assertEqual(filtered_result, filtered_document) document = {'name': 'Han Solo', 'a': {'b': {}}} fields = ['name', 'a.b.c'] filtered_document = {'name': 'Han Solo'} check_nested(document, fields, filtered_document) document = { 'a': { 'b': { 'c': 2, 'e': 3 }, 'e': 5 }, 'b': 2, 'c': { 'g': 1 } } fields = ['a.b.c', 'a.e'] filtered_document = {'a': {'b': {'c': 2}, 'e': 5}} check_nested(document, fields, filtered_document) document = { 'a': { 'b': { 'c': 2, 'e': 3 }, 'e': 5 }, 'b': 2, 'c': { 'g': 1 }, '_id': 1 } fields = ['a.b.c', 'a.e'] filtered_document = {'a': {'b': {'c': 2}, 'e': 5}, '_id': 1} check_nested(document, fields, filtered_document) document = {'a': {'b': {'c': {'d': 1}}}, '-a': {'-b': {'-c': 2}}} fields = ['a.b', '-a'] filtered_document = document.copy() check_nested(document, fields, filtered_document) document = {'a': {'b': {'c': {'d': 1}}}, '-a': {'-b': {'-c': 2}}} fields = ['a', '-a.-b'] filtered_document = document.copy() check_nested(document, fields, filtered_document) document = { 'a': { 'b': { 'c': { 'd': 1 } } }, '-a': { '-b': { '-c': 2 } }, '_id': 1 } fields = ['a.b', '-a'] filtered_document = document.copy() check_nested(document, fields, filtered_document) fields = ['a', '-a.-b'] check_nested(document, fields, filtered_document) document = {'test': 1} fields = ['doesnt_exist'] filtered_document = {} check_nested(document, fields, filtered_document) document = {'a': {'b': 1}, 'b': {'a': 1}} fields = ['a.b', 'b.a'] filtered_document = document.copy() check_nested(document, fields, filtered_document) document = {'a': {'b': {'a': {'b': 1}}}, 'c': {'a': {'b': 1}}} fields = ['a.b'] filtered_document = {'a': {'b': {'a': {'b': 1}}}} check_nested(document, fields, filtered_document) document = {'name': 'anna', 'name_of_cat': 'pushkin'} fields = ['name'] filtered_document = {'name': 'anna'} check_nested(document, fields, filtered_document) update = {'$set': {'a.b': 1, 'a.c': 3, 'b': 2, 'c': {'b': 3}}} fields = ['a', 'c'] filtered_update = {'$set': {'a.b': 1, 'a.c': 3, 'c': {'b': 3}}} check_nested(update, fields, filtered_update, op='u') update = {'$set': {'a.b': {'c': 3, 'd': 1}, 'a.e': 1, 'a.f': 2}} fields = ['a.b.c', 'a.e'] filtered_update = {'$set': {'a.b': {'c': 3}, 'a.e': 1}} check_nested(update, fields, filtered_update, op='u') update = {'$set': {'a.b.1': 1, 'a.b.2': 2, 'b': 3}} fields = ['a.b'] filtered_update = {'$set': {'a.b.1': 1, 'a.b.2': 2}} check_nested(update, fields, filtered_update, op='u') update = {'$set': {'a.b': {'c': 3, 'd': 1}, 'a.e': 1}} fields = ['a.b.e'] filtered_update = None check_nested(update, fields, filtered_update, op='u') def test_nested_exclude_fields(self): def check_nested(document, exclude_fields, filtered_document, op='i'): if '_id' in exclude_fields: exclude_fields.remove('_id') filtered_result = self.opman.filter_oplog_entry( { 'op': op, 'o': document }, exclude_fields=exclude_fields) if filtered_result is not None: filtered_result = filtered_result['o'] self.assertEqual(filtered_result, filtered_document) document = {'a': {'b': {'c': {'d': 0, 'e': 1}}}} exclude_fields = ['a.b.c.d'] filtered_document = {'a': {'b': {'c': {'e': 1}}}} check_nested(document, exclude_fields, filtered_document) document = {'a': {'b': {'c': {'-a': 0, 'd': {'e': {'f': 1}}}}}} exclude_fields = ['a.b.c.d.e.f'] filtered_document = {'a': {'b': {'c': {'-a': 0, 'd': {'e': {}}}}}} check_nested(document, exclude_fields, filtered_document) document = {'a': 1} exclude_fields = ['a'] filtered_document = {} check_nested(document, exclude_fields, filtered_document) document = { 'a': { 'b': { 'c': 2, 'e': 3 }, 'e': 5 }, 'b': 2, 'c': { 'g': 1 } } exclude_fields = ['a.b.c', 'a.e'] filtered_document = {'a': {'b': {'e': 3}}, 'b': 2, 'c': {'g': 1}} check_nested(document, exclude_fields, filtered_document) document = { 'a': { 'b': { 'c': 2, 'e': 3 }, 'e': 5 }, 'b': 2, 'c': { 'g': 1 }, '_id': 1 } exclude_fields = ['a.b.c', 'a.e', '_id'] filtered_document = { 'a': { 'b': { 'e': 3 } }, 'b': 2, 'c': { 'g': 1 }, '_id': 1 } check_nested(document, exclude_fields, filtered_document) document = {'a': {'b': {'c': {'d': 1}}}, '-a': {'-b': {'-c': 2}}} exclude_fields = ['a.b', '-a'] filtered_document = {'a': {}} check_nested(document, exclude_fields, filtered_document) document = {'a': {'b': {'c': {'d': 1}}}, '-a': {'-b': {'-c': 2}}} exclude_fields = ['a', '-a.-b'] filtered_document = {'-a': {}} check_nested(document, exclude_fields, filtered_document) document = { 'a': { 'b': { 'c': { 'd': 1 } } }, '-a': { '-b': { '-c': 2 } }, '_id': 1 } exclude_fields = ['a.b', '-a'] filtered_document = {'_id': 1, 'a': {}} check_nested(document, exclude_fields, filtered_document) document = {'test': 1} exclude_fields = ['doesnt_exist'] filtered_document = document.copy() check_nested(document, exclude_fields, filtered_document) document = {'test': 1} exclude_fields = ['test.doesnt_exist'] filtered_document = document.copy() check_nested(document, exclude_fields, filtered_document) document = {'a': {'b': 1}, 'b': {'a': 1}} exclude_fields = ['a.b', 'b.a'] filtered_document = {'a': {}, 'b': {}} check_nested(document, exclude_fields, filtered_document) document = {'a': {'b': {'a': {'b': 1}}}, 'c': {'a': {'b': 1}}} exclude_fields = ['a.b'] filtered_document = {'a': {}, 'c': {'a': {'b': 1}}} check_nested(document, exclude_fields, filtered_document) document = {'name': 'anna', 'name_of_cat': 'pushkin'} exclude_fields = ['name'] filtered_document = {'name_of_cat': 'pushkin'} check_nested(document, exclude_fields, filtered_document) update = {'$set': {'a.b': 1, 'a.c': 3, 'b': 2, 'c': {'b': 3}}} exclude_fields = ['a', 'c'] filtered_update = {'$set': {'b': 2}} check_nested(update, exclude_fields, filtered_update, op='u') update = {'$set': {'a.b': {'c': 3, 'd': 1}, 'a.e': 1, 'a.f': 2}} exclude_fields = ['a.b.c', 'a.e'] filtered_update = {'$set': {'a.b': {'d': 1}, 'a.f': 2}} check_nested(update, exclude_fields, filtered_update, op='u') update = {'$set': {'a.b': {'c': 3, 'd': 1}, 'a.e': 1}} exclude_fields = ['a.b.c', 'a.b.d', 'a.e'] filtered_update = {'$set': {'a.b': {}}} check_nested(update, exclude_fields, filtered_update, op='u') update = {'$set': {'a.b.1': 1, 'a.b.2': 2, 'b': 3}} exclude_fields = ['a.b'] filtered_update = {'$set': {'b': 3}} check_nested(update, exclude_fields, filtered_update, op='u') update = {'$set': {'a.b.c': 42, 'd.e.f': 123, 'g': 456}} exclude_fields = ['a.b', 'd'] filtered_update = {'$set': {'g': 456}} check_nested(update, exclude_fields, filtered_update, op='u') update = {'$set': {'a.b': {'c': 3, 'd': 1}, 'a.e': 1}} exclude_fields = ['a.b', 'a.e'] filtered_update = None check_nested(update, exclude_fields, filtered_update, op='u')
class TestFilterFields(unittest.TestCase): @classmethod def setUpClass(cls): cls.repl_set = ReplicaSetSingle().start() cls.primary_conn = cls.repl_set.client() cls.oplog_coll = cls.primary_conn.local["oplog.rs"] @classmethod def tearDownClass(cls): cls.primary_conn.drop_database("test") close_client(cls.primary_conn) cls.repl_set.stop() def setUp(self): self.namespace_config = NamespaceConfig() self.opman = OplogThread( primary_client=self.primary_conn, doc_managers=(DocManager(), ), oplog_progress_dict=LockingDict(), namespace_config=self.namespace_config, ) def tearDown(self): try: self.opman.join() except RuntimeError: # OplogThread may not have been started pass def reset_include_fields(self, fields): self.opman.namespace_config = NamespaceConfig(include_fields=fields) def reset_exclude_fields(self, fields): self.opman.namespace_config = NamespaceConfig(exclude_fields=fields) def test_filter_fields(self): docman = self.opman.doc_managers[0] conn = self.opman.primary_client include_fields = ["a", "b", "c"] exclude_fields = ["d", "e", "f"] # Set fields to care about self.reset_include_fields(include_fields) # Documents have more than just these fields doc = {"a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6, "_id": 1} db = conn["test"]["test"] db.insert_one(doc) assert_soon(lambda: db.count() == 1) self.opman.dump_collection() result = docman._search()[0] keys = result.keys() for inc, exc in zip(include_fields, exclude_fields): self.assertIn(inc, keys) self.assertNotIn(exc, keys) def test_filter_exclude_oplog_entry(self): # Test oplog entries: these are callables, since # filter_oplog_entry modifies the oplog entry in-place def insert_op(): return {"op": "i", "o": {"_id": 0, "a": 1, "b": 2, "c": 3}} def update_op(): return { "op": "u", "o": { "$set": { "a": 4, "b": 5 }, "$unset": { "c": True } }, "o2": { "_id": 1 }, } def filter_doc(document, fields): if fields and "_id" in fields: fields.remove("_id") return self.opman.filter_oplog_entry(document, exclude_fields=fields) # Case 0: insert op, no fields provided filtered = filter_doc(insert_op(), None) self.assertEqual(filtered, insert_op()) # Case 1: insert op, fields provided filtered = filter_doc(insert_op(), ["c"]) self.assertEqual(filtered["o"], {"_id": 0, "a": 1, "b": 2}) # Case 2: insert op, fields provided, doc becomes empty except for _id filtered = filter_doc(insert_op(), ["a", "b", "c"]) self.assertEqual(filtered["o"], {"_id": 0}) # Case 3: update op, no fields provided filtered = filter_doc(update_op(), None) self.assertEqual(filtered, update_op()) # Case 4: update op, fields provided filtered = filter_doc(update_op(), ["b"]) self.assertNotIn("b", filtered["o"]["$set"]) self.assertIn("a", filtered["o"]["$set"]) self.assertEqual(filtered["o"]["$unset"], update_op()["o"]["$unset"]) # Case 5: update op, fields provided, empty $set filtered = filter_doc(update_op(), ["a", "b"]) self.assertNotIn("$set", filtered["o"]) self.assertEqual(filtered["o"]["$unset"], update_op()["o"]["$unset"]) # Case 6: update op, fields provided, empty $unset filtered = filter_doc(update_op(), ["c"]) self.assertNotIn("$unset", filtered["o"]) self.assertEqual(filtered["o"]["$set"], update_op()["o"]["$set"]) # Case 7: update op, fields provided, entry is nullified filtered = filter_doc(update_op(), ["a", "b", "c"]) self.assertEqual(filtered, None) # Case 8: update op, fields provided, replacement filtered = filter_doc( { "op": "u", "o": { "a": 1, "b": 2, "c": 3, "d": 4 } }, ["d", "e", "f"]) self.assertEqual(filtered, {"op": "u", "o": {"a": 1, "b": 2, "c": 3}}) def test_filter_oplog_entry(self): # Test oplog entries: these are callables, since # filter_oplog_entry modifies the oplog entry in-place def insert_op(): return {"op": "i", "o": {"_id": 0, "a": 1, "b": 2, "c": 3}} def update_op(): return { "op": "u", "o": { "$set": { "a": 4, "b": 5 }, "$unset": { "c": True } }, "o2": { "_id": 1 }, } def filter_doc(document, fields): if fields and "_id" not in fields: fields.append("_id") return self.opman.filter_oplog_entry(document, include_fields=fields) # Case 0: insert op, no fields provided filtered = filter_doc(insert_op(), None) self.assertEqual(filtered, insert_op()) # Case 1: insert op, fields provided filtered = filter_doc(insert_op(), ["a", "b"]) self.assertEqual(filtered["o"], {"_id": 0, "a": 1, "b": 2}) # Case 2: insert op, fields provided, doc becomes empty except for _id filtered = filter_doc(insert_op(), ["d", "e", "f"]) self.assertEqual(filtered["o"], {"_id": 0}) # Case 3: update op, no fields provided filtered = filter_doc(update_op(), None) self.assertEqual(filtered, update_op()) # Case 4: update op, fields provided filtered = filter_doc(update_op(), ["a", "c"]) self.assertNotIn("b", filtered["o"]["$set"]) self.assertIn("a", filtered["o"]["$set"]) self.assertEqual(filtered["o"]["$unset"], update_op()["o"]["$unset"]) # Case 5: update op, fields provided, empty $set filtered = filter_doc(update_op(), ["c"]) self.assertNotIn("$set", filtered["o"]) self.assertEqual(filtered["o"]["$unset"], update_op()["o"]["$unset"]) # Case 6: update op, fields provided, empty $unset filtered = filter_doc(update_op(), ["a", "b"]) self.assertNotIn("$unset", filtered["o"]) self.assertEqual(filtered["o"]["$set"], update_op()["o"]["$set"]) # Case 7: update op, fields provided, entry is nullified filtered = filter_doc(update_op(), ["d", "e", "f"]) self.assertEqual(filtered, None) # Case 8: update op, fields provided, replacement filtered = filter_doc( { "op": "u", "o": { "a": 1, "b": 2, "c": 3, "d": 4 } }, ["a", "b", "c"]) self.assertEqual(filtered, {"op": "u", "o": {"a": 1, "b": 2, "c": 3}}) def test_nested_fields(self): def check_nested(document, fields, filtered_document, op="i"): if "_id" not in fields: fields.append("_id") filtered_result = self.opman.filter_oplog_entry( { "op": op, "o": document }, include_fields=fields) if filtered_result is not None: filtered_result = filtered_result["o"] self.assertEqual(filtered_result, filtered_document) document = {"name": "Han Solo", "a": {"b": {}}} fields = ["name", "a.b.c"] filtered_document = {"name": "Han Solo"} check_nested(document, fields, filtered_document) document = { "a": { "b": { "c": 2, "e": 3 }, "e": 5 }, "b": 2, "c": { "g": 1 } } fields = ["a.b.c", "a.e"] filtered_document = {"a": {"b": {"c": 2}, "e": 5}} check_nested(document, fields, filtered_document) document = { "a": { "b": { "c": 2, "e": 3 }, "e": 5 }, "b": 2, "c": { "g": 1 }, "_id": 1, } fields = ["a.b.c", "a.e"] filtered_document = {"a": {"b": {"c": 2}, "e": 5}, "_id": 1} check_nested(document, fields, filtered_document) document = {"a": {"b": {"c": {"d": 1}}}, "-a": {"-b": {"-c": 2}}} fields = ["a.b", "-a"] filtered_document = document.copy() check_nested(document, fields, filtered_document) document = {"a": {"b": {"c": {"d": 1}}}, "-a": {"-b": {"-c": 2}}} fields = ["a", "-a.-b"] filtered_document = document.copy() check_nested(document, fields, filtered_document) document = { "a": { "b": { "c": { "d": 1 } } }, "-a": { "-b": { "-c": 2 } }, "_id": 1 } fields = ["a.b", "-a"] filtered_document = document.copy() check_nested(document, fields, filtered_document) fields = ["a", "-a.-b"] check_nested(document, fields, filtered_document) document = {"test": 1} fields = ["doesnt_exist"] filtered_document = {} check_nested(document, fields, filtered_document) document = {"a": {"b": 1}, "b": {"a": 1}} fields = ["a.b", "b.a"] filtered_document = document.copy() check_nested(document, fields, filtered_document) document = {"a": {"b": {"a": {"b": 1}}}, "c": {"a": {"b": 1}}} fields = ["a.b"] filtered_document = {"a": {"b": {"a": {"b": 1}}}} check_nested(document, fields, filtered_document) document = {"name": "anna", "name_of_cat": "pushkin"} fields = ["name"] filtered_document = {"name": "anna"} check_nested(document, fields, filtered_document) update = {"$set": {"a.b": 1, "a.c": 3, "b": 2, "c": {"b": 3}}} fields = ["a", "c"] filtered_update = {"$set": {"a.b": 1, "a.c": 3, "c": {"b": 3}}} check_nested(update, fields, filtered_update, op="u") update = {"$set": {"a.b": {"c": 3, "d": 1}, "a.e": 1, "a.f": 2}} fields = ["a.b.c", "a.e"] filtered_update = {"$set": {"a.b": {"c": 3}, "a.e": 1}} check_nested(update, fields, filtered_update, op="u") update = {"$set": {"a.b.1": 1, "a.b.2": 2, "b": 3}} fields = ["a.b"] filtered_update = {"$set": {"a.b.1": 1, "a.b.2": 2}} check_nested(update, fields, filtered_update, op="u") update = {"$set": {"a.b": {"c": 3, "d": 1}, "a.e": 1}} fields = ["a.b.e"] filtered_update = None check_nested(update, fields, filtered_update, op="u") def test_nested_exclude_fields(self): def check_nested(document, exclude_fields, filtered_document, op="i"): if "_id" in exclude_fields: exclude_fields.remove("_id") filtered_result = self.opman.filter_oplog_entry( { "op": op, "o": document }, exclude_fields=exclude_fields) if filtered_result is not None: filtered_result = filtered_result["o"] self.assertEqual(filtered_result, filtered_document) document = {"a": {"b": {"c": {"d": 0, "e": 1}}}} exclude_fields = ["a.b.c.d"] filtered_document = {"a": {"b": {"c": {"e": 1}}}} check_nested(document, exclude_fields, filtered_document) document = {"a": {"b": {"c": {"-a": 0, "d": {"e": {"f": 1}}}}}} exclude_fields = ["a.b.c.d.e.f"] filtered_document = {"a": {"b": {"c": {"-a": 0, "d": {"e": {}}}}}} check_nested(document, exclude_fields, filtered_document) document = {"a": 1} exclude_fields = ["a"] filtered_document = {} check_nested(document, exclude_fields, filtered_document) document = { "a": { "b": { "c": 2, "e": 3 }, "e": 5 }, "b": 2, "c": { "g": 1 } } exclude_fields = ["a.b.c", "a.e"] filtered_document = {"a": {"b": {"e": 3}}, "b": 2, "c": {"g": 1}} check_nested(document, exclude_fields, filtered_document) document = { "a": { "b": { "c": 2, "e": 3 }, "e": 5 }, "b": 2, "c": { "g": 1 }, "_id": 1, } exclude_fields = ["a.b.c", "a.e", "_id"] filtered_document = { "a": { "b": { "e": 3 } }, "b": 2, "c": { "g": 1 }, "_id": 1 } check_nested(document, exclude_fields, filtered_document) document = {"a": {"b": {"c": {"d": 1}}}, "-a": {"-b": {"-c": 2}}} exclude_fields = ["a.b", "-a"] filtered_document = {"a": {}} check_nested(document, exclude_fields, filtered_document) document = {"a": {"b": {"c": {"d": 1}}}, "-a": {"-b": {"-c": 2}}} exclude_fields = ["a", "-a.-b"] filtered_document = {"-a": {}} check_nested(document, exclude_fields, filtered_document) document = { "a": { "b": { "c": { "d": 1 } } }, "-a": { "-b": { "-c": 2 } }, "_id": 1 } exclude_fields = ["a.b", "-a"] filtered_document = {"_id": 1, "a": {}} check_nested(document, exclude_fields, filtered_document) document = {"test": 1} exclude_fields = ["doesnt_exist"] filtered_document = document.copy() check_nested(document, exclude_fields, filtered_document) document = {"test": 1} exclude_fields = ["test.doesnt_exist"] filtered_document = document.copy() check_nested(document, exclude_fields, filtered_document) document = {"a": {"b": 1}, "b": {"a": 1}} exclude_fields = ["a.b", "b.a"] filtered_document = {"a": {}, "b": {}} check_nested(document, exclude_fields, filtered_document) document = {"a": {"b": {"a": {"b": 1}}}, "c": {"a": {"b": 1}}} exclude_fields = ["a.b"] filtered_document = {"a": {}, "c": {"a": {"b": 1}}} check_nested(document, exclude_fields, filtered_document) document = {"name": "anna", "name_of_cat": "pushkin"} exclude_fields = ["name"] filtered_document = {"name_of_cat": "pushkin"} check_nested(document, exclude_fields, filtered_document) update = {"$set": {"a.b": 1, "a.c": 3, "b": 2, "c": {"b": 3}}} exclude_fields = ["a", "c"] filtered_update = {"$set": {"b": 2}} check_nested(update, exclude_fields, filtered_update, op="u") update = {"$set": {"a.b": {"c": 3, "d": 1}, "a.e": 1, "a.f": 2}} exclude_fields = ["a.b.c", "a.e"] filtered_update = {"$set": {"a.b": {"d": 1}, "a.f": 2}} check_nested(update, exclude_fields, filtered_update, op="u") update = {"$set": {"a.b": {"c": 3, "d": 1}, "a.e": 1}} exclude_fields = ["a.b.c", "a.b.d", "a.e"] filtered_update = {"$set": {"a.b": {}}} check_nested(update, exclude_fields, filtered_update, op="u") update = {"$set": {"a.b.1": 1, "a.b.2": 2, "b": 3}} exclude_fields = ["a.b"] filtered_update = {"$set": {"b": 3}} check_nested(update, exclude_fields, filtered_update, op="u") update = {"$set": {"a.b.c": 42, "d.e.f": 123, "g": 456}} exclude_fields = ["a.b", "d"] filtered_update = {"$set": {"g": 456}} check_nested(update, exclude_fields, filtered_update, op="u") update = {"$set": {"a.b": {"c": 3, "d": 1}, "a.e": 1}} exclude_fields = ["a.b", "a.e"] filtered_update = None check_nested(update, exclude_fields, filtered_update, op="u")
class TestFilterFields(unittest.TestCase): @classmethod def setUpClass(cls): cls.repl_set = ReplicaSetSingle().start() cls.primary_conn = cls.repl_set.client() cls.oplog_coll = cls.primary_conn.local['oplog.rs'] @classmethod def tearDownClass(cls): cls.primary_conn.drop_database("test") close_client(cls.primary_conn) cls.repl_set.stop() def setUp(self): self.dest_mapping_stru = DestMapping([], [], {}) self.opman = OplogThread(primary_client=self.primary_conn, doc_managers=(DocManager(), ), oplog_progress_dict=LockingDict(), dest_mapping_stru=self.dest_mapping_stru) def tearDown(self): try: self.opman.join() except RuntimeError: # OplogThread may not have been started pass def _check_fields(self, opman, fields, exclude_fields, projection): if fields: self.assertEqual(sorted(opman.fields), sorted(fields)) self.assertEqual(opman._fields, set(fields)) else: self.assertEqual(opman.fields, None) self.assertEqual(opman._fields, set([])) if exclude_fields: self.assertEqual(sorted(opman.exclude_fields), sorted(exclude_fields)) self.assertEqual(opman._exclude_fields, set(exclude_fields)) else: self.assertEqual(opman.exclude_fields, None) self.assertEqual(opman._exclude_fields, set([])) self.assertEqual(opman._projection, projection) def test_filter_fields(self): docman = self.opman.doc_managers[0] conn = self.opman.primary_client include_fields = ["a", "b", "c"] exclude_fields = ["d", "e", "f"] # Set fields to care about self.opman.fields = include_fields # Documents have more than just these fields doc = {"a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6, "_id": 1} db = conn['test']['test'] db.insert_one(doc) assert_soon(lambda: db.count() == 1) self.opman.dump_collection() result = docman._search()[0] keys = result.keys() for inc, exc in zip(include_fields, exclude_fields): self.assertIn(inc, keys) self.assertNotIn(exc, keys) def test_filter_exclude_oplog_entry(self): # Test oplog entries: these are callables, since # filter_oplog_entry modifies the oplog entry in-place insert_op = lambda: { "op": "i", "o": { "_id": 0, "a": 1, "b": 2, "c": 3 } } update_op = lambda: { "op": "u", "o": { "$set": { "a": 4, "b": 5 }, "$unset": { "c": True } }, "o2": { "_id": 1 } } # Case 0: insert op, no fields provided self.opman.exclude_fields = None filtered = self.opman.filter_oplog_entry(insert_op()) self.assertEqual(filtered, insert_op()) self.assertEqual(None, self.opman._projection) # Case 1: insert op, fields provided self.opman.exclude_fields = ['c'] filtered = self.opman.filter_oplog_entry(insert_op()) self.assertEqual(filtered['o'], {'_id': 0, 'a': 1, 'b': 2}) self.assertEqual({'c': 0}, self.opman._projection) # Case 2: insert op, fields provided, doc becomes empty except for _id self.opman.exclude_fields = ['a', 'b', 'c'] filtered = self.opman.filter_oplog_entry(insert_op()) self.assertEqual(filtered['o'], {'_id': 0}) self.assertEqual({'a': 0, 'b': 0, 'c': 0}, self.opman._projection) # Case 3: update op, no fields provided self.opman.exclude_fields = None filtered = self.opman.filter_oplog_entry(update_op()) self.assertEqual(filtered, update_op()) self.assertEqual(None, self.opman._projection) # Case 4: update op, fields provided self.opman.exclude_fields = ['b'] filtered = self.opman.filter_oplog_entry(update_op()) self.assertNotIn('b', filtered['o']['$set']) self.assertIn('a', filtered['o']['$set']) self.assertEqual(filtered['o']['$unset'], update_op()['o']['$unset']) self.assertEqual({'b': 0}, self.opman._projection) # Case 5: update op, fields provided, empty $set self.opman.exclude_fields = ['a', 'b'] filtered = self.opman.filter_oplog_entry(update_op()) self.assertNotIn('$set', filtered['o']) self.assertEqual(filtered['o']['$unset'], update_op()['o']['$unset']) self.assertEqual({'a': 0, 'b': 0}, self.opman._projection) # Case 6: update op, fields provided, empty $unset self.opman.exclude_fields = ['c'] filtered = self.opman.filter_oplog_entry(update_op()) self.assertNotIn('$unset', filtered['o']) self.assertEqual(filtered['o']['$set'], update_op()['o']['$set']) self.assertEqual({'c': 0}, self.opman._projection) # Case 7: update op, fields provided, entry is nullified self.opman.exclude_fields = ['a', 'b', 'c'] filtered = self.opman.filter_oplog_entry(update_op()) self.assertEqual(filtered, None) self.assertEqual({'a': 0, 'b': 0, 'c': 0}, self.opman._projection) # Case 8: update op, fields provided, replacement self.opman.exclude_fields = ['d', 'e', 'f'] filtered = self.opman.filter_oplog_entry({ 'op': 'u', 'o': { 'a': 1, 'b': 2, 'c': 3, 'd': 4 } }) self.assertEqual(filtered, {'op': 'u', 'o': {'a': 1, 'b': 2, 'c': 3}}) self.assertEqual({'d': 0, 'e': 0, 'f': 0}, self.opman._projection) def test_filter_oplog_entry(self): # Test oplog entries: these are callables, since # filter_oplog_entry modifies the oplog entry in-place insert_op = lambda: { "op": "i", "o": { "_id": 0, "a": 1, "b": 2, "c": 3 } } update_op = lambda: { "op": "u", "o": { "$set": { "a": 4, "b": 5 }, "$unset": { "c": True } }, "o2": { "_id": 1 } } # Case 0: insert op, no fields provided self.opman.fields = None filtered = self.opman.filter_oplog_entry(insert_op()) self.assertEqual(filtered, insert_op()) self.assertEqual(None, self.opman._projection) # Case 1: insert op, fields provided self.opman.fields = ['a', 'b'] filtered = self.opman.filter_oplog_entry(insert_op()) self.assertEqual(filtered['o'], {'_id': 0, 'a': 1, 'b': 2}) self.assertEqual({'_id': 1, 'a': 1, 'b': 1}, self.opman._projection) # Case 2: insert op, fields provided, doc becomes empty except for _id self.opman.fields = ['d', 'e', 'f'] filtered = self.opman.filter_oplog_entry(insert_op()) self.assertEqual(filtered['o'], {'_id': 0}) self.assertEqual({ '_id': 1, 'd': 1, 'e': 1, 'f': 1 }, self.opman._projection) # Case 3: update op, no fields provided self.opman.fields = None filtered = self.opman.filter_oplog_entry(update_op()) self.assertEqual(filtered, update_op()) self.assertEqual(None, self.opman._projection) # Case 4: update op, fields provided self.opman.fields = ['a', 'c'] filtered = self.opman.filter_oplog_entry(update_op()) self.assertNotIn('b', filtered['o']['$set']) self.assertIn('a', filtered['o']['$set']) self.assertEqual(filtered['o']['$unset'], update_op()['o']['$unset']) self.assertEqual({'_id': 1, 'a': 1, 'c': 1}, self.opman._projection) # Case 5: update op, fields provided, empty $set self.opman.fields = ['c'] filtered = self.opman.filter_oplog_entry(update_op()) self.assertNotIn('$set', filtered['o']) self.assertEqual(filtered['o']['$unset'], update_op()['o']['$unset']) self.assertEqual({'_id': 1, 'c': 1}, self.opman._projection) # Case 6: update op, fields provided, empty $unset self.opman.fields = ['a', 'b'] filtered = self.opman.filter_oplog_entry(update_op()) self.assertNotIn('$unset', filtered['o']) self.assertEqual(filtered['o']['$set'], update_op()['o']['$set']) self.assertEqual({'_id': 1, 'a': 1, 'b': 1}, self.opman._projection) # Case 7: update op, fields provided, entry is nullified self.opman.fields = ['d', 'e', 'f'] filtered = self.opman.filter_oplog_entry(update_op()) self.assertEqual(filtered, None) self.assertEqual({ '_id': 1, 'd': 1, 'e': 1, 'f': 1 }, self.opman._projection) # Case 8: update op, fields provided, replacement self.opman.fields = ['a', 'b', 'c'] filtered = self.opman.filter_oplog_entry({ 'op': 'u', 'o': { 'a': 1, 'b': 2, 'c': 3, 'd': 4 } }) self.assertEqual(filtered, {'op': 'u', 'o': {'a': 1, 'b': 2, 'c': 3}}) self.assertEqual({ '_id': 1, 'a': 1, 'b': 1, 'c': 1 }, self.opman._projection) def test_exclude_fields_constructor(self): # Test with the "_id" field in exclude_fields exclude_fields = ["_id", "title", "content", "author"] opman = OplogThread(primary_client=self.primary_conn, doc_managers=(DocManager(), ), oplog_progress_dict=LockingDict(), dest_mapping_stru=self.dest_mapping_stru, exclude_fields=exclude_fields) exclude_fields.remove('_id') self._check_fields(opman, [], exclude_fields, dict((f, 0) for f in exclude_fields)) extra_fields = exclude_fields + ['extra1', 'extra2'] filtered = opman.filter_oplog_entry({ 'op': 'i', 'o': dict((f, 1) for f in extra_fields) })['o'] self.assertEqual(dict((f, 1) for f in ['extra1', 'extra2']), filtered) # Test without "_id" field included in exclude_fields exclude_fields = ["title", "content", "author"] opman = OplogThread(primary_client=self.primary_conn, doc_managers=(DocManager(), ), oplog_progress_dict=LockingDict(), dest_mapping_stru=self.dest_mapping_stru, exclude_fields=exclude_fields) self._check_fields(opman, [], exclude_fields, dict((f, 0) for f in exclude_fields)) extra_fields = extra_fields + ['extra1', 'extra2'] filtered = opman.filter_oplog_entry({ 'op': 'i', 'o': dict((f, 1) for f in extra_fields) })['o'] self.assertEqual({'extra1': 1, 'extra2': 1}, filtered) # Test with only "_id" field in exclude_fields exclude_fields = ["_id"] opman = OplogThread(primary_client=self.primary_conn, doc_managers=(DocManager(), ), oplog_progress_dict=LockingDict(), dest_mapping_stru=self.dest_mapping_stru, exclude_fields=exclude_fields) self._check_fields(opman, [], [], None) extra_fields = exclude_fields + ['extra1', 'extra2'] filtered = opman.filter_oplog_entry({ 'op': 'i', 'o': dict((f, 1) for f in extra_fields) })['o'] self.assertEqual(dict((f, 1) for f in extra_fields), filtered) # Test with nothing set for exclude_fields opman = OplogThread(primary_client=self.primary_conn, doc_managers=(DocManager(), ), oplog_progress_dict=LockingDict(), dest_mapping_stru=self.dest_mapping_stru, exclude_fields=None) self._check_fields(opman, [], [], None) extra_fields = ['_id', 'extra1', 'extra2'] filtered = opman.filter_oplog_entry({ 'op': 'i', 'o': dict((f, 1) for f in extra_fields) })['o'] self.assertEqual(dict((f, 1) for f in extra_fields), filtered) def test_fields_constructor(self): # Test with "_id" field in constructor fields = ["_id", "title", "content", "author"] opman = OplogThread(primary_client=self.primary_conn, doc_managers=(DocManager(), ), oplog_progress_dict=LockingDict(), dest_mapping_stru=self.dest_mapping_stru, fields=fields) self._check_fields(opman, fields, [], dict((f, 1) for f in fields)) extra_fields = fields + ['extra1', 'extra2'] filtered = opman.filter_oplog_entry({ 'op': 'i', 'o': dict((f, 1) for f in extra_fields) })['o'] self.assertEqual(dict((f, 1) for f in fields), filtered) # Test without "_id" field in constructor fields = ["title", "content", "author"] opman = OplogThread(primary_client=self.primary_conn, doc_managers=(DocManager(), ), oplog_progress_dict=LockingDict(), dest_mapping_stru=self.dest_mapping_stru, fields=fields) fields.append('_id') self._check_fields(opman, fields, [], dict((f, 1) for f in fields)) extra_fields = fields + ['extra1', 'extra2'] filtered = opman.filter_oplog_entry({ 'op': 'i', 'o': dict((f, 1) for f in extra_fields) })['o'] self.assertEqual(dict((f, 1) for f in fields), filtered) # Test with only "_id" field fields = ["_id"] opman = OplogThread(primary_client=self.primary_conn, doc_managers=(DocManager(), ), oplog_progress_dict=LockingDict(), dest_mapping_stru=self.dest_mapping_stru, fields=fields) self._check_fields(opman, fields, [], dict((f, 1) for f in fields)) extra_fields = fields + ['extra1', 'extra2'] filtered = opman.filter_oplog_entry({ 'op': 'i', 'o': dict((f, 1) for f in extra_fields) })['o'] self.assertEqual({'_id': 1}, filtered) # Test with no fields set opman = OplogThread(primary_client=self.primary_conn, doc_managers=(DocManager(), ), oplog_progress_dict=LockingDict(), dest_mapping_stru=self.dest_mapping_stru) self._check_fields(opman, [], [], None) extra_fields = ['_id', 'extra1', 'extra2'] filtered = opman.filter_oplog_entry({ 'op': 'i', 'o': dict((f, 1) for f in extra_fields) })['o'] self.assertEqual(dict((f, 1) for f in extra_fields), filtered) def test_exclude_fields_attr(self): # Test with the "_id" field in exclude_fields. exclude_fields = ["_id", "title", "content", "author"] exclude_fields.remove('_id') self.opman.exclude_fields = exclude_fields self._check_fields(self.opman, [], exclude_fields, dict((f, 0) for f in exclude_fields)) extra_fields = exclude_fields + ['extra1', 'extra2'] filtered = self.opman.filter_oplog_entry({ 'op': 'i', 'o': dict((f, 1) for f in extra_fields) })['o'] self.assertEqual(dict((f, 1) for f in ['extra1', 'extra2']), filtered) # Test without "_id" field included in exclude_fields exclude_fields = ["title", "content", "author"] self.opman.exclude_fields = exclude_fields self._check_fields(self.opman, [], exclude_fields, dict((f, 0) for f in exclude_fields)) extra_fields = extra_fields + ['extra1', 'extra2'] filtered = self.opman.filter_oplog_entry({ 'op': 'i', 'o': dict((f, 1) for f in extra_fields) })['o'] self.assertEqual({'extra1': 1, 'extra2': 1}, filtered) # Test with only "_id" field in exclude_fields exclude_fields = ["_id"] self.opman.exclude_fields = exclude_fields self._check_fields(self.opman, [], [], None) extra_fields = exclude_fields + ['extra1', 'extra2'] filtered = self.opman.filter_oplog_entry({ 'op': 'i', 'o': dict((f, 1) for f in extra_fields) })['o'] self.assertEqual(dict((f, 1) for f in extra_fields), filtered) # Test with nothing set for exclude_fields self.opman.exclude_fields = None self._check_fields(self.opman, [], [], None) extra_fields = ['_id', 'extra1', 'extra2'] filtered = self.opman.filter_oplog_entry({ 'op': 'i', 'o': dict((f, 1) for f in extra_fields) })['o'] self.assertEqual(dict((f, 1) for f in extra_fields), filtered) def test_fields_attr(self): # Test with "_id" field included in fields fields = ["_id", "title", "content", "author"] self.opman.fields = fields self._check_fields(self.opman, fields, [], dict( (f, 1) for f in fields)) extra_fields = fields + ['extra1', 'extra2'] filtered = self.opman.filter_oplog_entry({ 'op': 'i', 'o': dict((f, 1) for f in extra_fields) })['o'] self.assertEqual(dict((f, 1) for f in fields), filtered) # Test without "_id" field included in fields fields = ["title", "content", "author"] self.opman.fields = fields fields.append('_id') self._check_fields(self.opman, fields, [], dict( (f, 1) for f in fields)) extra_fields = fields + ['extra1', 'extra2'] filtered = self.opman.filter_oplog_entry({ 'op': 'i', 'o': dict((f, 1) for f in extra_fields) })['o'] self.assertEqual(dict((f, 1) for f in fields), filtered) # Test with only "_id" field fields = ["_id"] self.opman.fields = fields self._check_fields(self.opman, fields, [], dict( (f, 1) for f in fields)) extra_fields = fields + ['extra1', 'extra2'] filtered = self.opman.filter_oplog_entry({ 'op': 'i', 'o': dict((f, 1) for f in extra_fields) })['o'] self.assertEqual({'_id': 1}, filtered) # Test with no fields set self.opman.fields = None self._check_fields(self.opman, [], [], None) extra_fields = ['_id', 'extra1', 'extra2'] filtered = self.opman.filter_oplog_entry({ 'op': 'i', 'o': dict((f, 1) for f in extra_fields) })['o'] self.assertEqual(dict((f, 1) for f in extra_fields), filtered) def test_nested_fields(self): def check_nested(document, fields, filtered_document): self.opman.fields = fields fields.append('_id') self.assertEqual(set(fields), self.opman._fields) self.assertEqual(sorted(fields), sorted(self.opman.fields)) filtered_result = self.opman.filter_oplog_entry({ 'op': 'i', 'o': document })['o'] self.assertEqual(filtered_result, filtered_document) document = {'name': 'Han Solo', 'a': {'b': {}}} fields = ['name', 'a.b.c'] filtered_document = {'name': 'Han Solo'} check_nested(document, fields, filtered_document) document = { 'a': { 'b': { 'c': 2, 'e': 3 }, 'e': 5 }, 'b': 2, 'c': { 'g': 1 } } fields = ['a.b.c', 'a.e'] filtered_document = {'a': {'b': {'c': 2}, 'e': 5}} check_nested(document, fields, filtered_document) document = { 'a': { 'b': { 'c': 2, 'e': 3 }, 'e': 5 }, 'b': 2, 'c': { 'g': 1 }, '_id': 1 } fields = ['a.b.c', 'a.e'] filtered_document = {'a': {'b': {'c': 2}, 'e': 5}, '_id': 1} check_nested(document, fields, filtered_document) document = {'a': {'b': {'c': {'d': 1}}}, '-a': {'-b': {'-c': 2}}} fields = ['a.b', '-a'] filtered_document = document.copy() check_nested(document, fields, filtered_document) document = {'a': {'b': {'c': {'d': 1}}}, '-a': {'-b': {'-c': 2}}} fields = ['a', '-a.-b'] filtered_document = document.copy() check_nested(document, fields, filtered_document) document = { 'a': { 'b': { 'c': { 'd': 1 } } }, '-a': { '-b': { '-c': 2 } }, '_id': 1 } fields = ['a.b', '-a'] filtered_document = document.copy() check_nested(document, fields, filtered_document) fields = ['a', '-a.-b'] check_nested(document, fields, filtered_document) document = {'test': 1} fields = ['doesnt_exist'] filtered_document = {} check_nested(document, fields, filtered_document) document = {'a': {'b': 1}, 'b': {'a': 1}} fields = ['a.b', 'b.a'] filtered_document = document.copy() check_nested(document, fields, filtered_document) document = {'a': {'b': {'a': {'b': 1}}}, 'c': {'a': {'b': 1}}} fields = ['a.b'] filtered_document = {'a': {'b': {'a': {'b': 1}}}} check_nested(document, fields, filtered_document) document = {'name': 'anna', 'name_of_cat': 'pushkin'} fields = ['name'] filtered_document = {'name': 'anna'} check_nested(document, fields, filtered_document) def test_nested_exclude_fields(self): def check_nested(document, exclude_fields, filtered_document): self.opman.exclude_fields = exclude_fields if '_id' in exclude_fields: exclude_fields.remove('_id') self.assertEqual(set(exclude_fields), self.opman._exclude_fields) self.assertEqual(sorted(exclude_fields), sorted(self.opman.exclude_fields)) filtered_result = self.opman.filter_oplog_entry({ 'op': 'i', 'o': document })['o'] self.assertEqual(filtered_result, filtered_document) document = {'a': {'b': {'c': {'d': 0, 'e': 1}}}} exclude_fields = ['a.b.c.d'] filtered_document = {'a': {'b': {'c': {'e': 1}}}} check_nested(document, exclude_fields, filtered_document) document = {'a': {'b': {'c': {'-a': 0, 'd': {'e': {'f': 1}}}}}} exclude_fields = ['a.b.c.d.e.f'] filtered_document = {'a': {'b': {'c': {'-a': 0}}}} check_nested(document, exclude_fields, filtered_document) document = {'a': 1} exclude_fields = ['a'] filtered_document = {} check_nested(document, exclude_fields, filtered_document) document = { 'a': { 'b': { 'c': 2, 'e': 3 }, 'e': 5 }, 'b': 2, 'c': { 'g': 1 } } exclude_fields = ['a.b.c', 'a.e'] filtered_document = {'a': {'b': {'e': 3}}, 'b': 2, 'c': {'g': 1}} check_nested(document, exclude_fields, filtered_document) document = { 'a': { 'b': { 'c': 2, 'e': 3 }, 'e': 5 }, 'b': 2, 'c': { 'g': 1 }, '_id': 1 } exclude_fields = ['a.b.c', 'a.e', '_id'] filtered_document = { 'a': { 'b': { 'e': 3 } }, 'b': 2, 'c': { 'g': 1 }, '_id': 1 } check_nested(document, exclude_fields, filtered_document) document = {'a': {'b': {'c': {'d': 1}}}, '-a': {'-b': {'-c': 2}}} exclude_fields = ['a.b', '-a'] filtered_document = {} check_nested(document, exclude_fields, filtered_document) document = {'a': {'b': {'c': {'d': 1}}}, '-a': {'-b': {'-c': 2}}} exclude_fields = ['a', '-a.-b'] filtered_document = {} check_nested(document, exclude_fields, filtered_document) document = { 'a': { 'b': { 'c': { 'd': 1 } } }, '-a': { '-b': { '-c': 2 } }, '_id': 1 } exclude_fields = ['a.b', '-a'] filtered_document = {'_id': 1} check_nested(document, exclude_fields, filtered_document) document = {'test': 1} exclude_fields = ['doesnt_exist'] filtered_document = document.copy() check_nested(document, exclude_fields, filtered_document) document = {'test': 1} exclude_fields = ['test.doesnt_exist'] filtered_document = document.copy() check_nested(document, exclude_fields, filtered_document) document = {'a': {'b': 1}, 'b': {'a': 1}} exclude_fields = ['a.b', 'b.a'] filtered_document = {} check_nested(document, exclude_fields, filtered_document) document = {'a': {'b': {'a': {'b': 1}}}, 'c': {'a': {'b': 1}}} exclude_fields = ['a.b'] filtered_document = {'c': {'a': {'b': 1}}} check_nested(document, exclude_fields, filtered_document) document = {'name': 'anna', 'name_of_cat': 'pushkin'} exclude_fields = ['name'] filtered_document = {'name_of_cat': 'pushkin'} check_nested(document, exclude_fields, filtered_document) def test_fields_and_exclude(self): fields = ['a', 'b', 'c', '_id'] exclude_fields = ['x', 'y', 'z'] # Test setting both to None in constructor opman = OplogThread(primary_client=self.primary_conn, doc_managers=(DocManager(), ), oplog_progress_dict=LockingDict(), dest_mapping_stru=self.dest_mapping_stru, fields=None, exclude_fields=None) self._check_fields(opman, [], [], None) opman = OplogThread(primary_client=self.primary_conn, doc_managers=(DocManager(), ), oplog_progress_dict=LockingDict(), dest_mapping_stru=self.dest_mapping_stru, fields=None, exclude_fields=exclude_fields) self._check_fields(opman, [], exclude_fields, dict((f, 0) for f in exclude_fields)) # Test setting fields when exclude_fields is set self.assertRaises(errors.InvalidConfiguration, setattr, opman, "fields", fields) self.assertRaises(errors.InvalidConfiguration, setattr, opman, "fields", None) opman = OplogThread(primary_client=self.primary_conn, doc_managers=(DocManager(), ), oplog_progress_dict=LockingDict(), dest_mapping_stru=self.dest_mapping_stru, exclude_fields=None, fields=fields) self._check_fields(opman, fields, [], dict((f, 1) for f in fields)) self.assertRaises(errors.InvalidConfiguration, setattr, opman, "exclude_fields", exclude_fields) self.assertRaises(errors.InvalidConfiguration, setattr, opman, "exclude_fields", None) self.assertRaises(errors.InvalidConfiguration, OplogThread, self.primary_conn, (DocManager(), ), LockingDict(), self.dest_mapping_stru, fields=fields, exclude_fields=exclude_fields)
class TestRollbacks(unittest.TestCase): def tearDown(self): close_client(self.primary_conn) close_client(self.secondary_conn) self.repl_set.stop() def setUp(self): # Create a new oplog progress file try: os.unlink("oplog.timestamp") except OSError: pass open("oplog.timestamp", "w").close() # Start a replica set self.repl_set = ReplicaSet().start() # Connection to the replica set as a whole self.main_conn = self.repl_set.client() # Connection to the primary specifically self.primary_conn = self.repl_set.primary.client() # Connection to the secondary specifically self.secondary_conn = self.repl_set.secondary.client( read_preference=ReadPreference.SECONDARY_PREFERRED) # Wipe any test data self.main_conn["test"]["mc"].drop() # Oplog thread doc_manager = DocManager() oplog_progress = LockingDict() self.opman = OplogThread( primary_client=self.main_conn, doc_managers=(doc_manager,), oplog_progress_dict=oplog_progress, ns_set=["test.mc"] ) def test_single_target(self): """Test with a single replication target""" self.opman.start() # Insert first document with primary up self.main_conn["test"]["mc"].insert_one({"i": 0}) self.assertEqual(self.primary_conn["test"]["mc"].find().count(), 1) # Make sure the insert is replicated secondary = self.secondary_conn assert_soon(lambda: secondary["test"]["mc"].count() == 1, "first write didn't replicate to secondary") # Kill the primary self.repl_set.primary.stop(destroy=False) # Wait for the secondary to be promoted assert_soon(lambda: secondary["admin"].command("isMaster")["ismaster"]) # Insert another document. This will be rolled back later retry_until_ok(self.main_conn["test"]["mc"].insert_one, {"i": 1}) self.assertEqual(secondary["test"]["mc"].count(), 2) # Wait for replication to doc manager assert_soon(lambda: len(self.opman.doc_managers[0]._search()) == 2, "not all writes were replicated to doc manager") # Kill the new primary self.repl_set.secondary.stop(destroy=False) # Start both servers back up self.repl_set.primary.start() primary_admin = self.primary_conn["admin"] assert_soon(lambda: primary_admin.command("isMaster")["ismaster"], "restarted primary never resumed primary status") self.repl_set.secondary.start() assert_soon(lambda: retry_until_ok(secondary.admin.command, 'replSetGetStatus')['myState'] == 2, "restarted secondary never resumed secondary status") assert_soon(lambda: retry_until_ok(self.main_conn.test.mc.find().count) > 0, "documents not found after primary/secondary restarted") # Only first document should exist in MongoDB self.assertEqual(self.main_conn["test"]["mc"].count(), 1) self.assertEqual(self.main_conn["test"]["mc"].find_one()["i"], 0) # Same case should hold for the doc manager doc_manager = self.opman.doc_managers[0] assert_soon(lambda: len(doc_manager._search()) == 1, 'documents never rolled back in doc manager.') self.assertEqual(doc_manager._search()[0]["i"], 0) # cleanup self.opman.join() def test_many_targets(self): """Test with several replication targets""" # OplogThread has multiple doc managers doc_managers = [DocManager(), DocManager(), DocManager()] self.opman.doc_managers = doc_managers self.opman.start() # Insert a document into each namespace self.main_conn["test"]["mc"].insert_one({"i": 0}) self.assertEqual(self.primary_conn["test"]["mc"].count(), 1) # Make sure the insert is replicated secondary = self.secondary_conn assert_soon(lambda: secondary["test"]["mc"].count() == 1, "first write didn't replicate to secondary") # Kill the primary self.repl_set.primary.stop(destroy=False) # Wait for the secondary to be promoted assert_soon(lambda: secondary.admin.command("isMaster")['ismaster'], 'secondary was never promoted') # Insert more documents. This will be rolled back later # Some of these documents will be manually removed from # certain doc managers, to emulate the effect of certain # target systems being ahead/behind others secondary_ids = [] for i in range(1, 10): secondary_ids.append( retry_until_ok(self.main_conn["test"]["mc"].insert_one, {"i": i}).inserted_id) self.assertEqual(self.secondary_conn["test"]["mc"].count(), 10) # Wait for replication to the doc managers def docmans_done(): for dm in self.opman.doc_managers: if len(dm._search()) != 10: return False return True assert_soon(docmans_done, "not all writes were replicated to doc managers") # Remove some documents from the doc managers to simulate # uneven replication ts = self.opman.doc_managers[0].get_last_doc()['_ts'] for id in secondary_ids[8:]: self.opman.doc_managers[1].remove(id, 'test.mc', ts) for id in secondary_ids[2:]: self.opman.doc_managers[2].remove(id, 'test.mc', ts) # Kill the new primary self.repl_set.secondary.stop(destroy=False) # Start both servers back up self.repl_set.primary.start() primary_admin = self.primary_conn["admin"] assert_soon(lambda: primary_admin.command("isMaster")['ismaster'], 'restarted primary never resumed primary status') self.repl_set.secondary.start() assert_soon(lambda: retry_until_ok(secondary.admin.command, 'replSetGetStatus')['myState'] == 2, "restarted secondary never resumed secondary status") assert_soon(lambda: retry_until_ok(self.primary_conn.test.mc.find().count) > 0, "documents not found after primary/secondary restarted") # Only first document should exist in MongoDB self.assertEqual(self.primary_conn["test"]["mc"].count(), 1) self.assertEqual(self.primary_conn["test"]["mc"].find_one()["i"], 0) # Give OplogThread some time to catch up time.sleep(10) # Same case should hold for the doc managers for dm in self.opman.doc_managers: self.assertEqual(len(dm._search()), 1) self.assertEqual(dm._search()[0]["i"], 0) self.opman.join() def test_deletions(self): """Test rolling back 'd' operations""" self.opman.start() # Insert a document, wait till it replicates to secondary self.main_conn["test"]["mc"].insert_one({"i": 0}) self.main_conn["test"]["mc"].insert_one({"i": 1}) self.assertEqual(self.primary_conn["test"]["mc"].find().count(), 2) assert_soon(lambda: self.secondary_conn["test"]["mc"].count() == 2, "first write didn't replicate to secondary") # Kill the primary, wait for secondary to be promoted self.repl_set.primary.stop(destroy=False) assert_soon(lambda: self.secondary_conn["admin"] .command("isMaster")["ismaster"]) # Delete first document retry_until_ok(self.main_conn["test"]["mc"].delete_one, {"i": 0}) self.assertEqual(self.secondary_conn["test"]["mc"].count(), 1) # Wait for replication to doc manager assert_soon(lambda: len(self.opman.doc_managers[0]._search()) == 1, "delete was not replicated to doc manager") # Kill the new primary self.repl_set.secondary.stop(destroy=False) # Start both servers back up self.repl_set.primary.start() primary_admin = self.primary_conn["admin"] assert_soon(lambda: primary_admin.command("isMaster")["ismaster"], "restarted primary never resumed primary status") self.repl_set.secondary.start() assert_soon(lambda: retry_until_ok(self.secondary_conn.admin.command, 'replSetGetStatus')['myState'] == 2, "restarted secondary never resumed secondary status") # Both documents should exist in mongo assert_soon(lambda: retry_until_ok( self.main_conn["test"]["mc"].count) == 2) # Both document should exist in doc manager doc_manager = self.opman.doc_managers[0] assert_soon(lambda: len(list(doc_manager._search())) == 2, ("Expected two documents, but got: %r" % list(doc_manager._search()))) self.opman.join() def test_stressed_rollback(self): """Stress test for a rollback with many documents.""" self.opman.start() c = self.main_conn.test.mc docman = self.opman.doc_managers[0] c2 = c.with_options(write_concern=WriteConcern(w=2)) c2.insert_many([{'i': i} for i in range(STRESS_COUNT)]) assert_soon(lambda: c2.count() == STRESS_COUNT) condition = lambda: len(docman._search()) == STRESS_COUNT assert_soon(condition, ("Was expecting %d documents in DocManager, " "but %d found instead." % (STRESS_COUNT, len(docman._search())))) primary_conn = self.repl_set.primary.client() self.repl_set.primary.stop(destroy=False) new_primary_conn = self.repl_set.secondary.client() admin = new_primary_conn.admin assert_soon( lambda: retry_until_ok(admin.command, "isMaster")['ismaster']) retry_until_ok(c.insert_many, [{'i': str(STRESS_COUNT + i)} for i in range(STRESS_COUNT)]) self.repl_set.secondary.stop(destroy=False) self.repl_set.primary.start() admin = primary_conn.admin assert_soon( lambda: retry_until_ok(admin.command, "isMaster")['ismaster']) self.repl_set.secondary.start() assert_soon(lambda: retry_until_ok(c.count) == STRESS_COUNT) assert_soon(condition, ("Was expecting %d documents in DocManager, " "but %d found instead." % (STRESS_COUNT, len(docman._search())))) self.opman.join()
class TestOplogManager(unittest.TestCase): """Defines all the testing methods, as well as a method that sets up the cluster """ def setUp(self): self.repl_set = ReplicaSetSingle().start() self.primary_conn = self.repl_set.client() self.oplog_coll = self.primary_conn.local['oplog.rs'] def reset_opman(self, include_ns=None, exclude_ns=None, dest_mapping=None): if include_ns is None: include_ns = [] if exclude_ns is None: exclude_ns = [] if dest_mapping is None: dest_mapping = {} # include_ns must not exist together with exclude_ns # dest_mapping must exist together with include_ns # those checks have been tested in test_config.py so we skip that here. self.dest_mapping_stru = DestMapping(include_ns, exclude_ns, dest_mapping) self.opman = OplogThread(primary_client=self.primary_conn, doc_managers=(DocManager(), ), oplog_progress_dict=LockingDict(), dest_mapping_stru=self.dest_mapping_stru, ns_set=include_ns, ex_ns_set=exclude_ns) def init_dbs(self): # includedb1.* & includedb2.includecol1 are interested collections self.primary_conn["includedb1"]["includecol1"].insert_many([{ "idb1col1": i } for i in range(1, 3)]) self.primary_conn["includedb1"]["includecol2"].insert_many([{ "idb1col2": i } for i in range(1, 3)]) self.primary_conn["includedb2"]["includecol1"].insert_many([{ "idb2col1": i } for i in range(1, 3)]) # the others are not interested collections self.primary_conn["includedb2"]["excludecol2"].insert_many([{ "idb2col2": i } for i in range(1, 3)]) self.primary_conn["excludedb3"]["excludecol1"].insert_many([{ "idb3col1": i } for i in range(1, 3)]) def tearDown(self): try: self.opman.join() except RuntimeError: pass # OplogThread may not have been started for db in self.primary_conn.database_names(): if db != "local": self.primary_conn.drop_database(db) close_client(self.primary_conn) self.repl_set.stop() def test_get_oplog_cursor(self): '''Test the get_oplog_cursor method''' # Put something in the dbs self.init_dbs() # timestamp is None - all oplog entries excluding no-ops are returned. # wildcard include case no impact the result self.reset_opman(["includedb1.*", "includedb2.includecol1"], [], {}) got_cursor = self.opman.get_oplog_cursor(None) oplog_cursor = self.oplog_coll.find({'op': {'$ne': 'n'}}) self.assertNotEqual(got_cursor, None) self.assertEqual(got_cursor.count(), oplog_cursor.count()) # wildcard exclude case no impact the result self.reset_opman([], ["includedb2.excludecol2", "excludedb3.*"], {}) got_cursor = self.opman.get_oplog_cursor(None) oplog_cursor = self.oplog_coll.find({'op': {'$ne': 'n'}}) self.assertNotEqual(got_cursor, None) self.assertEqual(got_cursor.count(), oplog_cursor.count()) # earliest entry is the only one at/after timestamp doc = {"ts": bson.Timestamp(1000, 0), "idb1col1": 1} self.primary_conn["includedb1"]["includecol1"].insert_one(doc) latest_timestamp = self.opman.get_last_oplog_timestamp() cursor = self.opman.get_oplog_cursor(latest_timestamp) self.assertNotEqual(cursor, None) self.assertEqual(cursor.count(), 1) next_entry_id = next(cursor)['o']['_id'] retrieved = self.primary_conn.includedb1.includecol1.find_one( next_entry_id) self.assertEqual(retrieved, doc) # many entries before and after timestamp self.primary_conn["includedb1"]["includecol1"].insert_many([{ "idb1col1": i } for i in range(2, 1002)]) oplog_cursor = self.oplog_coll.find( { 'op': { '$ne': 'n' }, 'ns': { '$not': re.compile(r'\.(system|\$cmd)') } }, sort=[("ts", pymongo.ASCENDING)]) # initial insert + 1000 more inserts self.assertEqual(oplog_cursor.count(), 11 + 1000) pivot = oplog_cursor.skip(400).limit(-1)[0] goc_cursor = self.opman.get_oplog_cursor(pivot["ts"]) self.assertEqual(goc_cursor.count(), 11 + 1000 - 400) def test_get_last_oplog_timestamp(self): """Test the get_last_oplog_timestamp method""" # empty oplog case has been tested in test_oplog_manager.py, # skip that here. # Put something in the dbs self.init_dbs() # Test non-empty oplog self.reset_opman(["includedb1.*", "includedb2.includecol1"], [], {}) self.opman.oplog = self.primary_conn["local"]["oplog.rs"] for i in range(1000): self.primary_conn["includedb1"]["includecol1"].insert_one( {"idb1col1": i + 500}) oplog = self.primary_conn["local"]["oplog.rs"] oplog = oplog.find({ 'op': { '$ne': 'n' } }).sort("$natural", pymongo.DESCENDING).limit(-1)[0] self.assertEqual(self.opman.get_last_oplog_timestamp(), oplog["ts"]) def test_dump_collection(self): """Test the dump_collection method Cases: 1. no namespace set is set 2. include namespace set is set 3. exclude namespace set is set empty oplog case has been tested in test_oplog_manager.py, skip that here. """ # Put something in the dbs self.init_dbs() # no namespace set is set self.reset_opman([], [], {}) self.opman.oplog = self.primary_conn["local"]["oplog.rs"] last_ts = self.opman.get_last_oplog_timestamp() self.assertEqual(last_ts, self.opman.dump_collection()) self.assertEqual(len(self.opman.doc_managers[0]._search()), 10) # include namespace set is set self.reset_opman(["includedb1.*", "includedb2.includecol1"], [], {}) self.opman.oplog = self.primary_conn["local"]["oplog.rs"] last_ts = self.opman.get_last_oplog_timestamp() self.assertEqual(last_ts, self.opman.dump_collection()) self.assertEqual(len(self.opman.doc_managers[0]._search()), 6) # exclude namespace set is set self.reset_opman([], ["includedb2.excludecol2", "excludedb3.*"], {}) self.opman.oplog = self.primary_conn["local"]["oplog.rs"] last_ts = self.opman.get_last_oplog_timestamp() self.assertEqual(last_ts, self.opman.dump_collection()) self.assertEqual(len(self.opman.doc_managers[0]._search()), 6) def test_dump_collection_with_error(self): """Test the dump_collection method with invalid documents. Cases: 1. non-empty oplog, continue_on_error=True, invalid documents """ self.reset_opman(["includedb1.*", "includedb2.includecol1"], [], {}) # non-empty oplog, continue_on_error=True, invalid documents self.opman.continue_on_error = True self.opman.oplog = self.primary_conn["local"]["oplog.rs"] docs = [{'a': i} for i in range(100)] for i in range(50, 60): docs[i]['_upsert_exception'] = True self.primary_conn['includedb1']['includecol3'].insert_many(docs) last_ts = self.opman.get_last_oplog_timestamp() self.assertEqual(last_ts, self.opman.dump_collection()) docs = self.opman.doc_managers[0]._search() docs = list(filter(lambda doc: 'a' in doc, docs)) docs.sort(key=lambda doc: doc['a']) self.assertEqual(len(docs), 90) expected_a = itertools.chain(range(0, 50), range(60, 100)) for doc, correct_a in zip(docs, expected_a): self.assertEqual(doc['a'], correct_a) def test_init_cursor(self): """Test the init_cursor method Cases: 1. no last checkpoint, no collection dump 2. no last checkpoint, collection dump ok and stuff to dump 3. no last checkpoint, nothing to dump, stuff in oplog 4. no last checkpoint, nothing to dump, nothing in oplog 5. no last checkpoint, no collection dump, stuff in oplog 6. last checkpoint exists 7. last checkpoint is behind """ # N.B. these sub-cases build off of each other and cannot be re-ordered # without side-effects self.reset_opman(["includedb1.*", "includedb2.includecol1"], [], {}) # No last checkpoint, no collection dump, nothing in oplog # "change oplog collection" to put nothing in oplog self.opman.oplog = self.primary_conn["includedb1"]["emptycollection"] self.opman.collection_dump = False self.assertTrue( all(doc['op'] == 'n' for doc in self.opman.init_cursor()[0])) self.assertEqual(self.opman.checkpoint, None) # No last checkpoint, empty collections, nothing in oplog self.opman.collection_dump = True cursor, cursor_empty = self.opman.init_cursor() self.assertEqual(cursor, None) self.assertTrue(cursor_empty) self.assertEqual(self.opman.checkpoint, None) # No last checkpoint, empty collections, something in oplog self.opman.oplog = self.primary_conn['local']['oplog.rs'] collection = self.primary_conn["includedb1"]["includecol1"] collection.insert_one({"idb1col1": 1}) collection.delete_one({"idb1col1": 1}) time.sleep(3) last_ts = self.opman.get_last_oplog_timestamp() cursor, cursor_empty = self.opman.init_cursor() self.assertFalse(cursor_empty) self.assertEqual(self.opman.checkpoint, last_ts) self.assertEqual(self.opman.read_last_checkpoint(), last_ts) # No last checkpoint, no collection dump, something in oplog # If collection dump is false the checkpoint should not be set self.opman.checkpoint = None self.opman.oplog_progress = LockingDict() self.opman.collection_dump = False collection.insert_one({"idb1col1": 2}) cursor, cursor_empty = self.opman.init_cursor() for doc in cursor: last_doc = doc self.assertEqual(last_doc['o']['idb1col1'], 2) self.assertIsNone(self.opman.checkpoint) # Last checkpoint exists collection.insert_many([{"idb1col1": i + 500} for i in range(1000)]) entry = list(self.primary_conn["local"]["oplog.rs"].find(skip=200, limit=-2)) self.opman.update_checkpoint(entry[0]["ts"]) cursor, cursor_empty = self.opman.init_cursor() self.assertEqual(next(cursor)["ts"], entry[1]["ts"]) self.assertEqual(self.opman.checkpoint, entry[0]["ts"]) self.assertEqual(self.opman.read_last_checkpoint(), entry[0]["ts"]) # Last checkpoint is behind self.opman.update_checkpoint(bson.Timestamp(1, 0)) cursor, cursor_empty = self.opman.init_cursor() self.assertTrue(cursor_empty) self.assertEqual(cursor, None) self.assertEqual(self.opman.checkpoint, bson.Timestamp(1, 0)) def test_namespace_mapping(self): """Test mapping of namespaces Cases: upsert/delete/update of documents: 1. in namespace set, mapping provided 2. outside of namespace set, mapping provided """ source_ns_wildcard = ["includedb1.*", "includedb2.includecol1"] source_ns = [ "includedb1.includecol1", "includedb1.includecol2", "includedb2.includecol1" ] phony_ns = ["includedb2.excludecol2", "excludedb3.excludecol1"] dest_mapping = { "includedb1.*": "newdb1_*.bar", "includedb2.includecol1": "newdb2.newcol1" } self.reset_opman(source_ns_wildcard, [], dest_mapping) docman = self.opman.doc_managers[0] dest_mapping_stru = self.opman.dest_mapping_stru # start replicating self.opman.start() base_doc = {"_id": 1, "name": "superman"} # doc in namespace set for ns in source_ns: db, coll = ns.split(".", 1) # test insert self.primary_conn[db][coll].insert_one(base_doc) assert_soon(lambda: len(docman._search()) == 1) self.assertEqual(docman._search()[0]["ns"], dest_mapping_stru.get(ns)) bad = [d for d in docman._search() if d["ns"] == ns] self.assertEqual(len(bad), 0) # test update self.primary_conn[db][coll].update_one( {"_id": 1}, {"$set": { "weakness": "kryptonite" }}) def update_complete(): docs = docman._search() for d in docs: if d.get("weakness") == "kryptonite": return True return False assert_soon(update_complete) self.assertEqual(docman._search()[0]["ns"], dest_mapping_stru.get(ns)) bad = [d for d in docman._search() if d["ns"] == ns] self.assertEqual(len(bad), 0) # test delete self.primary_conn[db][coll].delete_one({"_id": 1}) assert_soon(lambda: len(docman._search()) == 0) bad = [ d for d in docman._search() if d["ns"] == dest_mapping_stru.get(ns) ] self.assertEqual(len(bad), 0) # cleanup self.primary_conn[db][coll].delete_many({}) self.opman.doc_managers[0]._delete() # doc not in namespace set for ns in phony_ns: db, coll = ns.split(".", 1) # test insert self.primary_conn[db][coll].insert_one(base_doc) time.sleep(1) self.assertEqual(len(docman._search()), 0) # test update self.primary_conn[db][coll].update_one( {"_id": 1}, {"$set": { "weakness": "kryptonite" }}) time.sleep(1) self.assertEqual(len(docman._search()), 0) def test_many_targets(self): """Test that one OplogThread is capable of replicating to more than one target. """ self.reset_opman(["includedb1.*"], [], {}) doc_managers = [DocManager(), DocManager(), DocManager()] self.opman.doc_managers = doc_managers # start replicating self.opman.start() self.primary_conn["includedb1"]["includecol1"].insert_one({ "name": "kermit", "color": "green" }) self.primary_conn["includedb1"]["includecol2"].insert_one({ "name": "elmo", "color": "firetruck red" }) self.primary_conn["excludedb2"]["excludecol1"].insert_one({ "name": "panda", "color": "white and black" }) assert_soon( lambda: sum(len(d._search()) for d in doc_managers) == 6, "OplogThread should be able to replicate to multiple targets") self.primary_conn["includedb1"]["includecol2"].delete_one( {"name": "elmo"}) assert_soon( lambda: sum(len(d._search()) for d in doc_managers) == 3, "OplogThread should be able to replicate to multiple targets") for d in doc_managers: self.assertEqual(d._search()[0]["name"], "kermit")
class TestRollbacks(unittest.TestCase): def tearDown(self): kill_all() def setUp(self): # Create a new oplog progress file try: os.unlink("config.txt") except OSError: pass open("config.txt", "w").close() # Start a replica set _, self.secondary_p, self.primary_p = start_replica_set('rollbacks') # Connection to the replica set as a whole self.main_conn = MongoClient('%s:%d' % (mongo_host, self.primary_p), replicaSet='rollbacks') # Connection to the primary specifically self.primary_conn = MongoClient('%s:%d' % (mongo_host, self.primary_p)) # Connection to the secondary specifically self.secondary_conn = MongoClient( '%s:%d' % (mongo_host, self.secondary_p), read_preference=ReadPreference.SECONDARY_PREFERRED ) # Wipe any test data self.main_conn["test"]["mc"].drop() # Oplog thread doc_manager = DocManager() oplog_progress = LockingDict() self.opman = OplogThread( primary_conn=self.main_conn, main_address='%s:%d' % (mongo_host, self.primary_p), oplog_coll=self.main_conn["local"]["oplog.rs"], is_sharded=False, doc_manager=doc_manager, oplog_progress_dict=oplog_progress, namespace_set=["test.mc"], auth_key=None, auth_username=None, repl_set="rollbacks" ) def test_single_target(self): """Test with a single replication target""" self.opman.start() # Insert first document with primary up self.main_conn["test"]["mc"].insert({"i": 0}) self.assertEqual(self.primary_conn["test"]["mc"].find().count(), 1) # Make sure the insert is replicated secondary = self.secondary_conn self.assertTrue(wait_for(lambda: secondary["test"]["mc"].count() == 1), "first write didn't replicate to secondary") # Kill the primary kill_mongo_proc(self.primary_p, destroy=False) # Wait for the secondary to be promoted while not secondary["admin"].command("isMaster")["ismaster"]: time.sleep(1) # Insert another document. This will be rolled back later retry_until_ok(self.main_conn["test"]["mc"].insert, {"i": 1}) self.assertEqual(secondary["test"]["mc"].count(), 2) # Wait for replication to doc manager c = lambda: len(self.opman.doc_managers[0]._search()) == 2 self.assertTrue(wait_for(c), "not all writes were replicated to doc manager") # Kill the new primary kill_mongo_proc(self.secondary_p, destroy=False) # Start both servers back up restart_mongo_proc(self.primary_p) primary_admin = self.primary_conn["admin"] while not primary_admin.command("isMaster")["ismaster"]: time.sleep(1) restart_mongo_proc(self.secondary_p) while secondary["admin"].command("replSetGetStatus")["myState"] != 2: time.sleep(1) while retry_until_ok(self.main_conn["test"]["mc"].find().count) == 0: time.sleep(1) # Only first document should exist in MongoDB self.assertEqual(self.main_conn["test"]["mc"].count(), 1) self.assertEqual(self.main_conn["test"]["mc"].find_one()["i"], 0) # Same case should hold for the doc manager doc_manager = self.opman.doc_managers[0] self.assertEqual(len(doc_manager._search()), 1) self.assertEqual(doc_manager._search()[0]["i"], 0) # cleanup self.opman.join() def test_many_targets(self): """Test with several replication targets""" # OplogThread has multiple doc managers doc_managers = [DocManager(), DocManager(), DocManager()] self.opman.doc_managers = doc_managers self.opman.start() # Insert a document into each namespace self.main_conn["test"]["mc"].insert({"i": 0}) self.assertEqual(self.primary_conn["test"]["mc"].count(), 1) # Make sure the insert is replicated secondary = self.secondary_conn self.assertTrue(wait_for(lambda: secondary["test"]["mc"].count() == 1), "first write didn't replicate to secondary") # Kill the primary kill_mongo_proc(self.primary_p, destroy=False) # Wait for the secondary to be promoted while not secondary["admin"].command("isMaster")["ismaster"]: time.sleep(1) # Insert more documents. This will be rolled back later # Some of these documents will be manually removed from # certain doc managers, to emulate the effect of certain # target systems being ahead/behind others secondary_ids = [] for i in range(1, 10): secondary_ids.append( retry_until_ok(self.main_conn["test"]["mc"].insert, {"i": i})) self.assertEqual(self.secondary_conn["test"]["mc"].count(), 10) # Wait for replication to the doc managers def docmans_done(): for dm in self.opman.doc_managers: if len(dm._search()) != 10: return False return True self.assertTrue(wait_for(docmans_done), "not all writes were replicated to doc managers") # Remove some documents from the doc managers to simulate # uneven replication for id in secondary_ids[8:]: self.opman.doc_managers[1].remove({"_id": id}) for id in secondary_ids[2:]: self.opman.doc_managers[2].remove({"_id": id}) # Kill the new primary kill_mongo_proc(self.secondary_p, destroy=False) # Start both servers back up restart_mongo_proc(self.primary_p) primary_admin = self.primary_conn["admin"] while not primary_admin.command("isMaster")["ismaster"]: time.sleep(1) restart_mongo_proc(self.secondary_p) while retry_until_ok(secondary["admin"].command, "replSetGetStatus")["myState"] != 2: time.sleep(1) while retry_until_ok(self.primary_conn["test"]["mc"].find().count) == 0: time.sleep(1) # Only first document should exist in MongoDB self.assertEqual(self.primary_conn["test"]["mc"].count(), 1) self.assertEqual(self.primary_conn["test"]["mc"].find_one()["i"], 0) # Give OplogThread some time to catch up time.sleep(10) # Same case should hold for the doc managers for dm in self.opman.doc_managers: self.assertEqual(len(dm._search()), 1) self.assertEqual(dm._search()[0]["i"], 0) self.opman.join()
class TestRollbacks(unittest.TestCase): def tearDown(self): self.repl_set.stop() def setUp(self): # Create a new oplog progress file try: os.unlink("oplog.timestamp") except OSError: pass open("oplog.timestamp", "w").close() # Start a replica set self.repl_set = ReplicaSet().start() # Connection to the replica set as a whole self.main_conn = self.repl_set.client() # Connection to the primary specifically self.primary_conn = self.repl_set.primary.client() # Connection to the secondary specifically self.secondary_conn = self.repl_set.secondary.client( read_preference=ReadPreference.SECONDARY_PREFERRED) # Wipe any test data self.main_conn["test"]["mc"].drop() # Oplog thread doc_manager = DocManager() oplog_progress = LockingDict() self.opman = OplogThread(primary_client=self.main_conn, doc_managers=(doc_manager, ), oplog_progress_dict=oplog_progress, ns_set=["test.mc"]) def test_single_target(self): """Test with a single replication target""" self.opman.start() # Insert first document with primary up self.main_conn["test"]["mc"].insert({"i": 0}) self.assertEqual(self.primary_conn["test"]["mc"].find().count(), 1) # Make sure the insert is replicated secondary = self.secondary_conn assert_soon(lambda: secondary["test"]["mc"].count() == 1, "first write didn't replicate to secondary") # Kill the primary self.repl_set.primary.stop(destroy=False) # Wait for the secondary to be promoted assert_soon(lambda: secondary["admin"].command("isMaster")["ismaster"]) # Insert another document. This will be rolled back later retry_until_ok(self.main_conn["test"]["mc"].insert, {"i": 1}) self.assertEqual(secondary["test"]["mc"].count(), 2) # Wait for replication to doc manager assert_soon(lambda: len(self.opman.doc_managers[0]._search()) == 2, "not all writes were replicated to doc manager") # Kill the new primary self.repl_set.secondary.stop(destroy=False) # Start both servers back up self.repl_set.primary.start() primary_admin = self.primary_conn["admin"] assert_soon(lambda: primary_admin.command("isMaster")["ismaster"], "restarted primary never resumed primary status") self.repl_set.secondary.start() assert_soon( lambda: retry_until_ok(secondary.admin.command, 'replSetGetStatus' )['myState'] == 2, "restarted secondary never resumed secondary status") assert_soon( lambda: retry_until_ok(self.main_conn.test.mc.find().count) > 0, "documents not found after primary/secondary restarted") # Only first document should exist in MongoDB self.assertEqual(self.main_conn["test"]["mc"].count(), 1) self.assertEqual(self.main_conn["test"]["mc"].find_one()["i"], 0) # Same case should hold for the doc manager doc_manager = self.opman.doc_managers[0] assert_soon(lambda: len(doc_manager._search()) == 1, 'documents never rolled back in doc manager.') self.assertEqual(doc_manager._search()[0]["i"], 0) # cleanup self.opman.join() def test_many_targets(self): """Test with several replication targets""" # OplogThread has multiple doc managers doc_managers = [DocManager(), DocManager(), DocManager()] self.opman.doc_managers = doc_managers self.opman.start() # Insert a document into each namespace self.main_conn["test"]["mc"].insert({"i": 0}) self.assertEqual(self.primary_conn["test"]["mc"].count(), 1) # Make sure the insert is replicated secondary = self.secondary_conn assert_soon(lambda: secondary["test"]["mc"].count() == 1, "first write didn't replicate to secondary") # Kill the primary self.repl_set.primary.stop(destroy=False) # Wait for the secondary to be promoted assert_soon(lambda: secondary.admin.command("isMaster")['ismaster'], 'secondary was never promoted') # Insert more documents. This will be rolled back later # Some of these documents will be manually removed from # certain doc managers, to emulate the effect of certain # target systems being ahead/behind others secondary_ids = [] for i in range(1, 10): secondary_ids.append( retry_until_ok(self.main_conn["test"]["mc"].insert, {"i": i})) self.assertEqual(self.secondary_conn["test"]["mc"].count(), 10) # Wait for replication to the doc managers def docmans_done(): for dm in self.opman.doc_managers: if len(dm._search()) != 10: return False return True assert_soon(docmans_done, "not all writes were replicated to doc managers") # Remove some documents from the doc managers to simulate # uneven replication ts = self.opman.doc_managers[0].get_last_doc()['_ts'] for id in secondary_ids[8:]: self.opman.doc_managers[1].remove(id, 'test.mc', ts) for id in secondary_ids[2:]: self.opman.doc_managers[2].remove(id, 'test.mc', ts) # Kill the new primary self.repl_set.secondary.stop(destroy=False) # Start both servers back up self.repl_set.primary.start() primary_admin = self.primary_conn["admin"] assert_soon(lambda: primary_admin.command("isMaster")['ismaster'], 'restarted primary never resumed primary status') self.repl_set.secondary.start() assert_soon( lambda: retry_until_ok(secondary.admin.command, 'replSetGetStatus' )['myState'] == 2, "restarted secondary never resumed secondary status") assert_soon( lambda: retry_until_ok(self.primary_conn.test.mc.find().count) > 0, "documents not found after primary/secondary restarted") # Only first document should exist in MongoDB self.assertEqual(self.primary_conn["test"]["mc"].count(), 1) self.assertEqual(self.primary_conn["test"]["mc"].find_one()["i"], 0) # Give OplogThread some time to catch up time.sleep(10) # Same case should hold for the doc managers for dm in self.opman.doc_managers: self.assertEqual(len(dm._search()), 1) self.assertEqual(dm._search()[0]["i"], 0) self.opman.join() def test_deletions(self): """Test rolling back 'd' operations""" self.opman.start() # Insert a document, wait till it replicates to secondary self.main_conn["test"]["mc"].insert({"i": 0}) self.main_conn["test"]["mc"].insert({"i": 1}) self.assertEqual(self.primary_conn["test"]["mc"].find().count(), 2) assert_soon(lambda: self.secondary_conn["test"]["mc"].count() == 2, "first write didn't replicate to secondary") # Kill the primary, wait for secondary to be promoted self.repl_set.primary.stop(destroy=False) assert_soon(lambda: self.secondary_conn["admin"].command("isMaster")[ "ismaster"]) # Delete first document retry_until_ok(self.main_conn["test"]["mc"].remove, {"i": 0}) self.assertEqual(self.secondary_conn["test"]["mc"].count(), 1) # Wait for replication to doc manager assert_soon(lambda: len(self.opman.doc_managers[0]._search()) == 1, "delete was not replicated to doc manager") # Kill the new primary self.repl_set.secondary.stop(destroy=False) # Start both servers back up self.repl_set.primary.start() primary_admin = self.primary_conn["admin"] assert_soon(lambda: primary_admin.command("isMaster")["ismaster"], "restarted primary never resumed primary status") self.repl_set.secondary.start() assert_soon( lambda: retry_until_ok(self.secondary_conn.admin.command, 'replSetGetStatus')['myState'] == 2, "restarted secondary never resumed secondary status") # Both documents should exist in mongo assert_soon( lambda: retry_until_ok(self.main_conn["test"]["mc"].count) == 2) # Both document should exist in doc manager doc_manager = self.opman.doc_managers[0] assert_soon(lambda: len(list(doc_manager._search())) == 2, ("Expected two documents, but got: %r" % list(doc_manager._search()))) self.opman.join() def test_stressed_rollback(self): """Stress test for a rollback with many documents.""" self.opman.start() c = self.main_conn.test.mc docman = self.opman.doc_managers[0] c.insert(({'i': i} for i in range(STRESS_COUNT)), w=2) assert_soon(lambda: c.count() == STRESS_COUNT) condition = lambda: len(docman._search()) == STRESS_COUNT assert_soon(condition, ("Was expecting %d documents in DocManager, " "but %d found instead." % (STRESS_COUNT, len(docman._search())))) primary_conn = self.repl_set.primary.client() self.repl_set.primary.stop(destroy=False) new_primary_conn = self.repl_set.secondary.client() admin = new_primary_conn.admin assert_soon( lambda: retry_until_ok(admin.command, "isMaster")['ismaster']) retry_until_ok(c.insert, [{ 'i': str(STRESS_COUNT + i) } for i in range(STRESS_COUNT)]) self.repl_set.secondary.stop(destroy=False) self.repl_set.primary.start() admin = primary_conn.admin assert_soon( lambda: retry_until_ok(admin.command, "isMaster")['ismaster']) self.repl_set.secondary.start() assert_soon(lambda: retry_until_ok(c.count) == STRESS_COUNT) assert_soon(condition, ("Was expecting %d documents in DocManager, " "but %d found instead." % (STRESS_COUNT, len(docman._search())))) self.opman.join()
class TestOplogManager(unittest.TestCase): """Defines all the testing methods, as well as a method that sets up the cluster """ def setUp(self): _, _, self.primary_p = start_replica_set('test-oplog-manager') self.primary_conn = pymongo.MongoClient(mongo_host, self.primary_p) self.oplog_coll = self.primary_conn.local['oplog.rs'] self.opman = OplogThread( primary_conn=self.primary_conn, main_address='%s:%d' % (mongo_host, self.primary_p), oplog_coll=self.oplog_coll, is_sharded=False, doc_manager=DocManager(), oplog_progress_dict=LockingDict(), namespace_set=None, auth_key=None, auth_username=None, repl_set='test-oplog-manager' ) def tearDown(self): try: self.opman.join() except RuntimeError: pass # OplogThread may not have been started self.primary_conn.close() kill_replica_set('test-oplog-manager') def test_retrieve_doc(self): """ Test the retrieve_doc method """ # Trivial case where the oplog entry is None self.assertEqual(self.opman.retrieve_doc(None), None) # Retrieve a document from insert operation in oplog doc = {"name": "mango", "type": "fruit", "ns": "test.test", "weight": 3.24, "i": 1} self.primary_conn["test"]["test"].insert(doc) oplog_entries = self.primary_conn["local"]["oplog.rs"].find( sort=[("ts", pymongo.DESCENDING)], limit=1 ) oplog_entry = next(oplog_entries) self.assertEqual(self.opman.retrieve_doc(oplog_entry), doc) # Retrieve a document from update operation in oplog self.primary_conn["test"]["test"].update( {"i": 1}, {"$set": {"sounds-like": "mongo"}} ) oplog_entries = self.primary_conn["local"]["oplog.rs"].find( sort=[("ts", pymongo.DESCENDING)], limit=1 ) doc["sounds-like"] = "mongo" self.assertEqual(self.opman.retrieve_doc(next(oplog_entries)), doc) # Retrieve a document from remove operation in oplog # (expected: None) self.primary_conn["test"]["test"].remove({ "i": 1 }) oplog_entries = self.primary_conn["local"]["oplog.rs"].find( sort=[("ts", pymongo.DESCENDING)], limit=1 ) self.assertEqual(self.opman.retrieve_doc(next(oplog_entries)), None) # Retrieve a document with bad _id # (expected: None) oplog_entry["o"]["_id"] = "ThisIsNotAnId123456789" self.assertEqual(self.opman.retrieve_doc(oplog_entry), None) def test_get_oplog_cursor(self): '''Test the get_oplog_cursor method''' # Trivial case: timestamp is None self.assertEqual(self.opman.get_oplog_cursor(None), None) # earliest entry is after given timestamp doc = {"ts": bson.Timestamp(1000, 0), "i": 1} self.primary_conn["test"]["test"].insert(doc) self.assertEqual(self.opman.get_oplog_cursor( bson.Timestamp(1, 0)), None) # earliest entry is the only one at/after timestamp latest_timestamp = self.opman.get_last_oplog_timestamp() cursor = self.opman.get_oplog_cursor(latest_timestamp) self.assertNotEqual(cursor, None) self.assertEqual(cursor.count(), 1) self.assertEqual(self.opman.retrieve_doc(next(cursor)), doc) # many entries before and after timestamp self.primary_conn["test"]["test"].insert( {"i": i} for i in range(2, 1002)) oplog_cursor = self.oplog_coll.find( sort=[("ts", pymongo.ASCENDING)] ) # startup + insert + 1000 inserts self.assertEqual(oplog_cursor.count(), 2 + 1000) pivot = oplog_cursor.skip(400).limit(1)[0] goc_cursor = self.opman.get_oplog_cursor(pivot["ts"]) self.assertEqual(goc_cursor.count(), 2 + 1000 - 400) # get_oplog_cursor fast-forwards *one doc beyond* the given timestamp doc = self.primary_conn["test"]["test"].find_one( {"_id": next(goc_cursor)["o"]["_id"]}) self.assertEqual(doc["i"], self.opman.retrieve_doc(pivot)["i"] + 1) def test_get_last_oplog_timestamp(self): """Test the get_last_oplog_timestamp method""" # "empty" the oplog self.opman.oplog = self.primary_conn["test"]["emptycollection"] self.assertEqual(self.opman.get_last_oplog_timestamp(), None) # Test non-empty oplog self.opman.oplog = self.primary_conn["local"]["oplog.rs"] for i in range(1000): self.primary_conn["test"]["test"].insert({ "i": i + 500 }) oplog = self.primary_conn["local"]["oplog.rs"] oplog = oplog.find().sort("$natural", pymongo.DESCENDING).limit(1)[0] self.assertEqual(self.opman.get_last_oplog_timestamp(), oplog["ts"]) def test_dump_collection(self): """Test the dump_collection method Cases: 1. empty oplog 2. non-empty oplog """ # Test with empty oplog self.opman.oplog = self.primary_conn["test"]["emptycollection"] last_ts = self.opman.dump_collection() self.assertEqual(last_ts, None) # Test with non-empty oplog self.opman.oplog = self.primary_conn["local"]["oplog.rs"] for i in range(1000): self.primary_conn["test"]["test"].insert({ "i": i + 500 }) last_ts = self.opman.get_last_oplog_timestamp() self.assertEqual(last_ts, self.opman.dump_collection()) self.assertEqual(len(self.opman.doc_managers[0]._search()), 1000) def test_init_cursor(self): """Test the init_cursor method Cases: 1. no last checkpoint, no collection dump 2. no last checkpoint, collection dump ok and stuff to dump 3. no last checkpoint, nothing to dump, stuff in oplog 4. no last checkpoint, nothing to dump, nothing in oplog 5. last checkpoint exists """ # N.B. these sub-cases build off of each other and cannot be re-ordered # without side-effects # No last checkpoint, no collection dump, nothing in oplog # "change oplog collection" to put nothing in oplog self.opman.oplog = self.primary_conn["test"]["emptycollection"] self.opman.collection_dump = False self.assertEqual(self.opman.init_cursor(), None) self.assertEqual(self.opman.checkpoint, None) # No last checkpoint, empty collections, nothing in oplog self.opman.collection_dump = True self.assertEqual(self.opman.init_cursor(), None) self.assertEqual(self.opman.checkpoint, None) # No last checkpoint, empty collections, something in oplog self.opman.oplog = self.primary_conn['local']['oplog.rs'] collection = self.primary_conn["test"]["test"] collection.insert({"i": 1}) collection.remove({"i": 1}) time.sleep(3) last_ts = self.opman.get_last_oplog_timestamp() self.assertEqual(next(self.opman.init_cursor())["ts"], last_ts) self.assertEqual(self.opman.checkpoint, last_ts) with self.opman.oplog_progress as prog: self.assertEqual(prog.get_dict()[str(self.opman.oplog)], last_ts) # No last checkpoint, non-empty collections, stuff in oplog self.opman.oplog_progress = LockingDict() self.assertEqual(next(self.opman.init_cursor())["ts"], last_ts) self.assertEqual(self.opman.checkpoint, last_ts) with self.opman.oplog_progress as prog: self.assertEqual(prog.get_dict()[str(self.opman.oplog)], last_ts) # Last checkpoint exists progress = LockingDict() self.opman.oplog_progress = progress for i in range(1000): collection.insert({"i": i + 500}) entry = list( self.primary_conn["local"]["oplog.rs"].find(skip=200, limit=2)) progress.get_dict()[str(self.opman.oplog)] = entry[0]["ts"] self.opman.oplog_progress = progress self.opman.checkpoint = None cursor = self.opman.init_cursor() self.assertEqual(entry[1]["ts"], next(cursor)["ts"]) self.assertEqual(self.opman.checkpoint, entry[0]["ts"]) with self.opman.oplog_progress as prog: self.assertEqual(prog.get_dict()[str(self.opman.oplog)], entry[0]["ts"]) def test_filter_fields(self): docman = self.opman.doc_managers[0] conn = self.opman.main_connection include_fields = ["a", "b", "c"] exclude_fields = ["d", "e", "f"] # Set fields to care about self.opman.fields = include_fields # Documents have more than just these fields doc = { "a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6, "_id": 1 } db = conn['test']['test'] db.insert(doc) assert_soon(lambda: db.count() == 1) self.opman.dump_collection() result = docman._search()[0] keys = result.keys() for inc, exc in zip(include_fields, exclude_fields): self.assertIn(inc, keys) self.assertNotIn(exc, keys) def test_namespace_mapping(self): """Test mapping of namespaces Cases: upsert/delete/update of documents: 1. in namespace set, mapping provided 2. outside of namespace set, mapping provided """ source_ns = ["test.test1", "test.test2"] phony_ns = ["test.phony1", "test.phony2"] dest_mapping = {"test.test1": "test.test1_dest", "test.test2": "test.test2_dest"} self.opman.dest_mapping = dest_mapping self.opman.namespace_set = source_ns docman = self.opman.doc_managers[0] # start replicating self.opman.start() base_doc = {"_id": 1, "name": "superman"} # doc in namespace set for ns in source_ns: db, coll = ns.split(".", 1) # test insert self.primary_conn[db][coll].insert(base_doc) assert_soon(lambda: len(docman._search()) == 1) self.assertEqual(docman._search()[0]["ns"], dest_mapping[ns]) bad = [d for d in docman._search() if d["ns"] == ns] self.assertEqual(len(bad), 0) # test update self.primary_conn[db][coll].update( {"_id": 1}, {"$set": {"weakness": "kryptonite"}} ) def update_complete(): docs = docman._search() for d in docs: if d.get("weakness") == "kryptonite": return True return False assert_soon(update_complete) self.assertEqual(docman._search()[0]["ns"], dest_mapping[ns]) bad = [d for d in docman._search() if d["ns"] == ns] self.assertEqual(len(bad), 0) # test delete self.primary_conn[db][coll].remove({"_id": 1}) assert_soon(lambda: len(docman._search()) == 0) bad = [d for d in docman._search() if d["ns"] == dest_mapping[ns]] self.assertEqual(len(bad), 0) # cleanup self.primary_conn[db][coll].remove() self.opman.doc_managers[0]._delete() # doc not in namespace set for ns in phony_ns: db, coll = ns.split(".", 1) # test insert self.primary_conn[db][coll].insert(base_doc) time.sleep(1) self.assertEqual(len(docman._search()), 0) # test update self.primary_conn[db][coll].update( {"_id": 1}, {"$set": {"weakness": "kryptonite"}} ) time.sleep(1) self.assertEqual(len(docman._search()), 0) def test_many_targets(self): """Test that one OplogThread is capable of replicating to more than one target. """ doc_managers = [DocManager(), DocManager(), DocManager()] self.opman.doc_managers = doc_managers # start replicating self.opman.start() self.primary_conn["test"]["test"].insert({ "name": "kermit", "color": "green" }) self.primary_conn["test"]["test"].insert({ "name": "elmo", "color": "firetruck red" }) assert_soon( lambda: sum(len(d._search()) for d in doc_managers) == 6, "OplogThread should be able to replicate to multiple targets" ) self.primary_conn["test"]["test"].remove({"name": "elmo"}) assert_soon( lambda: sum(len(d._search()) for d in doc_managers) == 3, "OplogThread should be able to replicate to multiple targets" ) for d in doc_managers: self.assertEqual(d._search()[0]["name"], "kermit")
class TestOplogManager(unittest.TestCase): """Defines all the testing methods, as well as a method that sets up the cluster """ def setUp(self): self.repl_set = ReplicaSetSingle().start() self.primary_conn = self.repl_set.client() self.oplog_coll = self.primary_conn.local['oplog.rs'] def reset_opman(self, include_ns=None, exclude_ns=None, dest_mapping=None): if include_ns is None: include_ns = [] if exclude_ns is None: exclude_ns = [] if dest_mapping is None: dest_mapping = {} # include_ns must not exist together with exclude_ns # dest_mapping must exist together with include_ns # those checks have been tested in test_config.py so we skip that here. self.dest_mapping_stru = DestMapping(include_ns, exclude_ns, dest_mapping) self.opman = OplogThread( primary_client=self.primary_conn, doc_managers=(DocManager(),), oplog_progress_dict=LockingDict(), dest_mapping_stru=self.dest_mapping_stru, ns_set=include_ns, ex_ns_set=exclude_ns ) def init_dbs(self): # includedb1.* & includedb2.includecol1 are interested collections self.primary_conn["includedb1"]["includecol1"].insert_many( [{"idb1col1": i} for i in range(1, 3)]) self.primary_conn["includedb1"]["includecol2"].insert_many( [{"idb1col2": i} for i in range(1, 3)]) self.primary_conn["includedb2"]["includecol1"].insert_many( [{"idb2col1": i} for i in range(1, 3)]) # the others are not interested collections self.primary_conn["includedb2"]["excludecol2"].insert_many( [{"idb2col2": i} for i in range(1, 3)]) self.primary_conn["excludedb3"]["excludecol1"].insert_many( [{"idb3col1": i} for i in range(1, 3)]) def tearDown(self): try: self.opman.join() except RuntimeError: pass # OplogThread may not have been started for db in self.primary_conn.database_names(): if db != "local": self.primary_conn.drop_database(db) close_client(self.primary_conn) self.repl_set.stop() def test_get_oplog_cursor(self): '''Test the get_oplog_cursor method''' # Put something in the dbs self.init_dbs() # timestamp is None - all oplog entries excluding no-ops are returned. # wildcard include case no impact the result self.reset_opman(["includedb1.*", "includedb2.includecol1"], [], {}) got_cursor = self.opman.get_oplog_cursor(None) oplog_cursor = self.oplog_coll.find( {'op': {'$ne': 'n'}}) self.assertNotEqual(got_cursor, None) self.assertEqual(got_cursor.count(), oplog_cursor.count()) # wildcard exclude case no impact the result self.reset_opman([], ["includedb2.excludecol2", "excludedb3.*"], {}) got_cursor = self.opman.get_oplog_cursor(None) oplog_cursor = self.oplog_coll.find( {'op': {'$ne': 'n'}}) self.assertNotEqual(got_cursor, None) self.assertEqual(got_cursor.count(), oplog_cursor.count()) # earliest entry is the only one at/after timestamp doc = {"ts": bson.Timestamp(1000, 0), "idb1col1": 1} self.primary_conn["includedb1"]["includecol1"].insert_one(doc) latest_timestamp = self.opman.get_last_oplog_timestamp() cursor = self.opman.get_oplog_cursor(latest_timestamp) self.assertNotEqual(cursor, None) self.assertEqual(cursor.count(), 1) next_entry_id = next(cursor)['o']['_id'] retrieved = self.primary_conn.includedb1.includecol1.find_one( next_entry_id) self.assertEqual(retrieved, doc) # many entries before and after timestamp self.primary_conn["includedb1"]["includecol1"].insert_many( [{"idb1col1": i} for i in range(2, 1002)]) oplog_cursor = self.oplog_coll.find( {'op': {'$ne': 'n'}, 'ns': {'$not': re.compile(r'\.(system|\$cmd)')}}, sort=[("ts", pymongo.ASCENDING)]) # initial insert + 1000 more inserts self.assertEqual(oplog_cursor.count(), 11 + 1000) pivot = oplog_cursor.skip(400).limit(-1)[0] goc_cursor = self.opman.get_oplog_cursor(pivot["ts"]) self.assertEqual(goc_cursor.count(), 11 + 1000 - 400) def test_get_last_oplog_timestamp(self): """Test the get_last_oplog_timestamp method""" # empty oplog case has been tested in test_oplog_manager.py, # skip that here. # Put something in the dbs self.init_dbs() # Test non-empty oplog self.reset_opman(["includedb1.*", "includedb2.includecol1"], [], {}) self.opman.oplog = self.primary_conn["local"]["oplog.rs"] for i in range(1000): self.primary_conn["includedb1"]["includecol1"].insert_one({ "idb1col1": i + 500 }) oplog = self.primary_conn["local"]["oplog.rs"] oplog = oplog.find( {'op': {'$ne': 'n'}}).sort( "$natural", pymongo.DESCENDING).limit(-1)[0] self.assertEqual(self.opman.get_last_oplog_timestamp(), oplog["ts"]) def test_dump_collection(self): """Test the dump_collection method Cases: 1. no namespace set is set 2. include namespace set is set 3. exclude namespace set is set empty oplog case has been tested in test_oplog_manager.py, skip that here. """ # Put something in the dbs self.init_dbs() # no namespace set is set self.reset_opman([], [], {}) self.opman.oplog = self.primary_conn["local"]["oplog.rs"] last_ts = self.opman.get_last_oplog_timestamp() self.assertEqual(last_ts, self.opman.dump_collection()) self.assertEqual(len(self.opman.doc_managers[0]._search()), 10) # include namespace set is set self.reset_opman(["includedb1.*", "includedb2.includecol1"], [], {}) self.opman.oplog = self.primary_conn["local"]["oplog.rs"] last_ts = self.opman.get_last_oplog_timestamp() self.assertEqual(last_ts, self.opman.dump_collection()) self.assertEqual(len(self.opman.doc_managers[0]._search()), 6) # exclude namespace set is set self.reset_opman([], ["includedb2.excludecol2", "excludedb3.*"], {}) self.opman.oplog = self.primary_conn["local"]["oplog.rs"] last_ts = self.opman.get_last_oplog_timestamp() self.assertEqual(last_ts, self.opman.dump_collection()) self.assertEqual(len(self.opman.doc_managers[0]._search()), 6) def test_dump_collection_with_error(self): """Test the dump_collection method with invalid documents. Cases: 1. non-empty oplog, continue_on_error=True, invalid documents """ self.reset_opman(["includedb1.*", "includedb2.includecol1"], [], {}) # non-empty oplog, continue_on_error=True, invalid documents self.opman.continue_on_error = True self.opman.oplog = self.primary_conn["local"]["oplog.rs"] docs = [{'a': i} for i in range(100)] for i in range(50, 60): docs[i]['_upsert_exception'] = True self.primary_conn['includedb1']['includecol3'].insert_many(docs) last_ts = self.opman.get_last_oplog_timestamp() self.assertEqual(last_ts, self.opman.dump_collection()) docs = self.opman.doc_managers[0]._search() docs = list(filter(lambda doc: 'a' in doc, docs)) docs.sort(key=lambda doc: doc['a']) self.assertEqual(len(docs), 90) expected_a = itertools.chain(range(0, 50), range(60, 100)) for doc, correct_a in zip(docs, expected_a): self.assertEqual(doc['a'], correct_a) def test_init_cursor(self): """Test the init_cursor method Cases: 1. no last checkpoint, no collection dump 2. no last checkpoint, collection dump ok and stuff to dump 3. no last checkpoint, nothing to dump, stuff in oplog 4. no last checkpoint, nothing to dump, nothing in oplog 5. no last checkpoint, no collection dump, stuff in oplog 6. last checkpoint exists 7. last checkpoint is behind """ # N.B. these sub-cases build off of each other and cannot be re-ordered # without side-effects self.reset_opman(["includedb1.*", "includedb2.includecol1"], [], {}) # No last checkpoint, no collection dump, nothing in oplog # "change oplog collection" to put nothing in oplog self.opman.oplog = self.primary_conn["includedb1"]["emptycollection"] self.opman.collection_dump = False self.assertTrue(all(doc['op'] == 'n' for doc in self.opman.init_cursor()[0])) self.assertEqual(self.opman.checkpoint, None) # No last checkpoint, empty collections, nothing in oplog self.opman.collection_dump = True cursor, cursor_empty = self.opman.init_cursor() self.assertEqual(cursor, None) self.assertTrue(cursor_empty) self.assertEqual(self.opman.checkpoint, None) # No last checkpoint, empty collections, something in oplog self.opman.oplog = self.primary_conn['local']['oplog.rs'] collection = self.primary_conn["includedb1"]["includecol1"] collection.insert_one({"idb1col1": 1}) collection.delete_one({"idb1col1": 1}) time.sleep(3) last_ts = self.opman.get_last_oplog_timestamp() cursor, cursor_empty = self.opman.init_cursor() self.assertFalse(cursor_empty) self.assertEqual(self.opman.checkpoint, last_ts) self.assertEqual(self.opman.read_last_checkpoint(), last_ts) # No last checkpoint, no collection dump, something in oplog # If collection dump is false the checkpoint should not be set self.opman.checkpoint = None self.opman.oplog_progress = LockingDict() self.opman.collection_dump = False collection.insert_one({"idb1col1": 2}) cursor, cursor_empty = self.opman.init_cursor() for doc in cursor: last_doc = doc self.assertEqual(last_doc['o']['idb1col1'], 2) self.assertIsNone(self.opman.checkpoint) # Last checkpoint exists collection.insert_many([{"idb1col1": i + 500} for i in range(1000)]) entry = list( self.primary_conn["local"]["oplog.rs"].find(skip=200, limit=-2)) self.opman.update_checkpoint(entry[0]["ts"]) cursor, cursor_empty = self.opman.init_cursor() self.assertEqual(next(cursor)["ts"], entry[1]["ts"]) self.assertEqual(self.opman.checkpoint, entry[0]["ts"]) self.assertEqual(self.opman.read_last_checkpoint(), entry[0]["ts"]) # Last checkpoint is behind self.opman.update_checkpoint(bson.Timestamp(1, 0)) cursor, cursor_empty = self.opman.init_cursor() self.assertTrue(cursor_empty) self.assertEqual(cursor, None) self.assertEqual(self.opman.checkpoint, bson.Timestamp(1, 0)) def test_namespace_mapping(self): """Test mapping of namespaces Cases: upsert/delete/update of documents: 1. in namespace set, mapping provided 2. outside of namespace set, mapping provided """ source_ns_wildcard = ["includedb1.*", "includedb2.includecol1"] source_ns = ["includedb1.includecol1", "includedb1.includecol2", "includedb2.includecol1"] phony_ns = ["includedb2.excludecol2", "excludedb3.excludecol1"] dest_mapping = { "includedb1.*": "newdb1_*.bar", "includedb2.includecol1": "newdb2.newcol1" } self.reset_opman(source_ns_wildcard, [], dest_mapping) docman = self.opman.doc_managers[0] dest_mapping_stru = self.opman.dest_mapping_stru # start replicating self.opman.start() base_doc = {"_id": 1, "name": "superman"} # doc in namespace set for ns in source_ns: db, coll = ns.split(".", 1) # test insert self.primary_conn[db][coll].insert_one(base_doc) assert_soon(lambda: len(docman._search()) == 1) self.assertEqual(docman._search()[0]["ns"], dest_mapping_stru.get(ns)) bad = [d for d in docman._search() if d["ns"] == ns] self.assertEqual(len(bad), 0) # test update self.primary_conn[db][coll].update_one( {"_id": 1}, {"$set": {"weakness": "kryptonite"}} ) def update_complete(): docs = docman._search() for d in docs: if d.get("weakness") == "kryptonite": return True return False assert_soon(update_complete) self.assertEqual(docman._search()[0]["ns"], dest_mapping_stru.get(ns)) bad = [d for d in docman._search() if d["ns"] == ns] self.assertEqual(len(bad), 0) # test delete self.primary_conn[db][coll].delete_one({"_id": 1}) assert_soon(lambda: len(docman._search()) == 0) bad = [d for d in docman._search() if d["ns"] == dest_mapping_stru.get(ns)] self.assertEqual(len(bad), 0) # cleanup self.primary_conn[db][coll].delete_many({}) self.opman.doc_managers[0]._delete() # doc not in namespace set for ns in phony_ns: db, coll = ns.split(".", 1) # test insert self.primary_conn[db][coll].insert_one(base_doc) time.sleep(1) self.assertEqual(len(docman._search()), 0) # test update self.primary_conn[db][coll].update_one( {"_id": 1}, {"$set": {"weakness": "kryptonite"}} ) time.sleep(1) self.assertEqual(len(docman._search()), 0) def test_many_targets(self): """Test that one OplogThread is capable of replicating to more than one target. """ self.reset_opman(["includedb1.*"], [], {}) doc_managers = [DocManager(), DocManager(), DocManager()] self.opman.doc_managers = doc_managers # start replicating self.opman.start() self.primary_conn["includedb1"]["includecol1"].insert_one({ "name": "kermit", "color": "green" }) self.primary_conn["includedb1"]["includecol2"].insert_one({ "name": "elmo", "color": "firetruck red" }) self.primary_conn["excludedb2"]["excludecol1"].insert_one({ "name": "panda", "color": "white and black" }) assert_soon( lambda: sum(len(d._search()) for d in doc_managers) == 6, "OplogThread should be able to replicate to multiple targets" ) self.primary_conn["includedb1"]["includecol2"].delete_one({ "name": "elmo" }) assert_soon( lambda: sum(len(d._search()) for d in doc_managers) == 3, "OplogThread should be able to replicate to multiple targets" ) for d in doc_managers: self.assertEqual(d._search()[0]["name"], "kermit")
class TestOplogManager(unittest.TestCase): """Defines all the testing methods, as well as a method that sets up the cluster """ def setUp(self): self.repl_set = ReplicaSet().start() self.primary_conn = self.repl_set.client() self.oplog_coll = self.primary_conn.local['oplog.rs'] self.opman = OplogThread( primary_client=self.primary_conn, doc_managers=(DocManager(),), oplog_progress_dict=LockingDict() ) def tearDown(self): try: self.opman.join() except RuntimeError: pass # OplogThread may not have been started self.primary_conn.drop_database("test") close_client(self.primary_conn) self.repl_set.stop() def test_get_oplog_cursor(self): '''Test the get_oplog_cursor method''' # timestamp is None - all oplog entries are returned. cursor = self.opman.get_oplog_cursor(None) self.assertEqual(cursor.count(), self.primary_conn["local"]["oplog.rs"].count()) # earliest entry is the only one at/after timestamp doc = {"ts": bson.Timestamp(1000, 0), "i": 1} self.primary_conn["test"]["test"].insert_one(doc) latest_timestamp = self.opman.get_last_oplog_timestamp() cursor = self.opman.get_oplog_cursor(latest_timestamp) self.assertNotEqual(cursor, None) self.assertEqual(cursor.count(), 1) next_entry_id = next(cursor)['o']['_id'] retrieved = self.primary_conn.test.test.find_one(next_entry_id) self.assertEqual(retrieved, doc) # many entries before and after timestamp self.primary_conn["test"]["test"].insert_many( [{"i": i} for i in range(2, 1002)]) oplog_cursor = self.oplog_coll.find( {'op': {'$ne': 'n'}, 'ns': {'$not': re.compile(r'\.(system|\$cmd)')}}, sort=[("ts", pymongo.ASCENDING)] ) # initial insert + 1000 more inserts self.assertEqual(oplog_cursor.count(), 1 + 1000) pivot = oplog_cursor.skip(400).limit(-1)[0] goc_cursor = self.opman.get_oplog_cursor(pivot["ts"]) self.assertEqual(goc_cursor.count(), 1 + 1000 - 400) def test_get_last_oplog_timestamp(self): """Test the get_last_oplog_timestamp method""" # "empty" the oplog self.opman.oplog = self.primary_conn["test"]["emptycollection"] self.assertEqual(self.opman.get_last_oplog_timestamp(), None) # Test non-empty oplog self.opman.oplog = self.primary_conn["local"]["oplog.rs"] for i in range(1000): self.primary_conn["test"]["test"].insert_one({ "i": i + 500 }) oplog = self.primary_conn["local"]["oplog.rs"] oplog = oplog.find().sort("$natural", pymongo.DESCENDING).limit(-1)[0] self.assertEqual(self.opman.get_last_oplog_timestamp(), oplog["ts"]) def test_dump_collection(self): """Test the dump_collection method Cases: 1. empty oplog 2. non-empty oplog """ # Test with empty oplog self.opman.oplog = self.primary_conn["test"]["emptycollection"] last_ts = self.opman.dump_collection() self.assertEqual(last_ts, None) # Test with non-empty oplog self.opman.oplog = self.primary_conn["local"]["oplog.rs"] for i in range(1000): self.primary_conn["test"]["test"].insert_one({ "i": i + 500 }) last_ts = self.opman.get_last_oplog_timestamp() self.assertEqual(last_ts, self.opman.dump_collection()) self.assertEqual(len(self.opman.doc_managers[0]._search()), 1000) def test_dump_collection_with_error(self): """Test the dump_collection method with invalid documents. Cases: 1. non-empty oplog, continue_on_error=True, invalid documents """ # non-empty oplog, continue_on_error=True, invalid documents self.opman.continue_on_error = True self.opman.oplog = self.primary_conn["local"]["oplog.rs"] docs = [{'a': i} for i in range(100)] for i in range(50, 60): docs[i]['_upsert_exception'] = True self.primary_conn['test']['test'].insert_many(docs) last_ts = self.opman.get_last_oplog_timestamp() self.assertEqual(last_ts, self.opman.dump_collection()) docs = self.opman.doc_managers[0]._search() docs.sort(key=lambda doc: doc['a']) self.assertEqual(len(docs), 90) expected_a = itertools.chain(range(0, 50), range(60, 100)) for doc, correct_a in zip(docs, expected_a): self.assertEqual(doc['a'], correct_a) def test_init_cursor(self): """Test the init_cursor method Cases: 1. no last checkpoint, no collection dump 2. no last checkpoint, collection dump ok and stuff to dump 3. no last checkpoint, nothing to dump, stuff in oplog 4. no last checkpoint, nothing to dump, nothing in oplog 5. no last checkpoint, no collection dump, stuff in oplog 6. last checkpoint exists 7. last checkpoint is behind """ # N.B. these sub-cases build off of each other and cannot be re-ordered # without side-effects # No last checkpoint, no collection dump, nothing in oplog # "change oplog collection" to put nothing in oplog self.opman.oplog = self.primary_conn["test"]["emptycollection"] self.opman.collection_dump = False self.assertTrue(all(doc['op'] == 'n' for doc in self.opman.init_cursor()[0])) self.assertEqual(self.opman.checkpoint, None) # No last checkpoint, empty collections, nothing in oplog self.opman.collection_dump = True cursor, cursor_empty = self.opman.init_cursor() self.assertEqual(cursor, None) self.assertTrue(cursor_empty) self.assertEqual(self.opman.checkpoint, None) # No last checkpoint, empty collections, something in oplog self.opman.oplog = self.primary_conn['local']['oplog.rs'] collection = self.primary_conn["test"]["test"] collection.insert_one({"i": 1}) collection.delete_one({"i": 1}) time.sleep(3) last_ts = self.opman.get_last_oplog_timestamp() cursor, cursor_empty = self.opman.init_cursor() self.assertFalse(cursor_empty) self.assertEqual(self.opman.checkpoint, last_ts) with self.opman.oplog_progress as prog: self.assertEqual(prog.get_dict()[self.opman.replset_name], last_ts) # No last checkpoint, no collection dump, something in oplog self.opman.oplog_progress = LockingDict() self.opman.collection_dump = False collection.insert_one({"i": 2}) last_ts = self.opman.get_last_oplog_timestamp() cursor, cursor_empty = self.opman.init_cursor() for doc in cursor: last_doc = doc self.assertEqual(last_doc['o']['i'], 2) self.assertEqual(self.opman.checkpoint, last_ts) # Last checkpoint exists progress = LockingDict() self.opman.oplog_progress = progress for i in range(1000): collection.insert_one({"i": i + 500}) entry = list( self.primary_conn["local"]["oplog.rs"].find(skip=200, limit=-2)) progress.get_dict()[self.opman.replset_name] = entry[0]["ts"] self.opman.oplog_progress = progress self.opman.checkpoint = None cursor, cursor_empty = self.opman.init_cursor() self.assertEqual(next(cursor)["ts"], entry[1]["ts"]) self.assertEqual(self.opman.checkpoint, entry[0]["ts"]) with self.opman.oplog_progress as prog: self.assertEqual(prog.get_dict()[self.opman.replset_name], entry[0]["ts"]) # Last checkpoint is behind progress = LockingDict() progress.get_dict()[self.opman.replset_name] = bson.Timestamp(1, 0) self.opman.oplog_progress = progress self.opman.checkpoint = None cursor, cursor_empty = self.opman.init_cursor() self.assertTrue(cursor_empty) self.assertEqual(cursor, None) self.assertIsNotNone(self.opman.checkpoint) def test_namespace_mapping(self): """Test mapping of namespaces Cases: upsert/delete/update of documents: 1. in namespace set, mapping provided 2. outside of namespace set, mapping provided """ source_ns = ["test.test1", "test.test2"] phony_ns = ["test.phony1", "test.phony2"] dest_mapping = {"test.test1": "test.test1_dest", "test.test2": "test.test2_dest"} self.opman.dest_mapping = dest_mapping self.opman.namespace_set = source_ns docman = self.opman.doc_managers[0] # start replicating self.opman.start() base_doc = {"_id": 1, "name": "superman"} # doc in namespace set for ns in source_ns: db, coll = ns.split(".", 1) # test insert self.primary_conn[db][coll].insert_one(base_doc) assert_soon(lambda: len(docman._search()) == 1) self.assertEqual(docman._search()[0]["ns"], dest_mapping[ns]) bad = [d for d in docman._search() if d["ns"] == ns] self.assertEqual(len(bad), 0) # test update self.primary_conn[db][coll].update_one( {"_id": 1}, {"$set": {"weakness": "kryptonite"}} ) def update_complete(): docs = docman._search() for d in docs: if d.get("weakness") == "kryptonite": return True return False assert_soon(update_complete) self.assertEqual(docman._search()[0]["ns"], dest_mapping[ns]) bad = [d for d in docman._search() if d["ns"] == ns] self.assertEqual(len(bad), 0) # test delete self.primary_conn[db][coll].delete_one({"_id": 1}) assert_soon(lambda: len(docman._search()) == 0) bad = [d for d in docman._search() if d["ns"] == dest_mapping[ns]] self.assertEqual(len(bad), 0) # cleanup self.primary_conn[db][coll].delete_many({}) self.opman.doc_managers[0]._delete() # doc not in namespace set for ns in phony_ns: db, coll = ns.split(".", 1) # test insert self.primary_conn[db][coll].insert_one(base_doc) time.sleep(1) self.assertEqual(len(docman._search()), 0) # test update self.primary_conn[db][coll].update_one( {"_id": 1}, {"$set": {"weakness": "kryptonite"}} ) time.sleep(1) self.assertEqual(len(docman._search()), 0) def test_many_targets(self): """Test that one OplogThread is capable of replicating to more than one target. """ doc_managers = [DocManager(), DocManager(), DocManager()] self.opman.doc_managers = doc_managers # start replicating self.opman.start() self.primary_conn["test"]["test"].insert_one({ "name": "kermit", "color": "green" }) self.primary_conn["test"]["test"].insert_one({ "name": "elmo", "color": "firetruck red" }) assert_soon( lambda: sum(len(d._search()) for d in doc_managers) == 6, "OplogThread should be able to replicate to multiple targets" ) self.primary_conn["test"]["test"].delete_one({"name": "elmo"}) assert_soon( lambda: sum(len(d._search()) for d in doc_managers) == 3, "OplogThread should be able to replicate to multiple targets" ) for d in doc_managers: self.assertEqual(d._search()[0]["name"], "kermit") def test_upgrade_oplog_progress(self): first_oplog_ts = self.opman.oplog.find_one()['ts'] # Old format oplog progress file: progress = {str(self.opman.oplog): bson_ts_to_long(first_oplog_ts)} # Set up oplog managers to use the old format. oplog_progress = LockingDict() oplog_progress.dict = progress self.opman.oplog_progress = oplog_progress # Cause the oplog managers to update their checkpoints. self.opman.checkpoint = first_oplog_ts self.opman.update_checkpoint() # New format should be in place now. new_format = {self.opman.replset_name: first_oplog_ts} self.assertEqual( new_format, self.opman.oplog_progress.get_dict() )
class TestFilterFields(unittest.TestCase): @classmethod def setUpClass(cls): cls.repl_set = ReplicaSetSingle().start() cls.primary_conn = cls.repl_set.client() cls.oplog_coll = cls.primary_conn.local["oplog.rs"] @classmethod def tearDownClass(cls): cls.primary_conn.drop_database("test") close_client(cls.primary_conn) cls.repl_set.stop() def setUp(self): self.namespace_config = NamespaceConfig() self.opman = OplogThread( primary_client=self.primary_conn, doc_managers=(DocManager(),), oplog_progress_dict=LockingDict(), namespace_config=self.namespace_config, ) def tearDown(self): try: self.opman.join() except RuntimeError: # OplogThread may not have been started pass def reset_include_fields(self, fields): self.opman.namespace_config = NamespaceConfig(include_fields=fields) def reset_exclude_fields(self, fields): self.opman.namespace_config = NamespaceConfig(exclude_fields=fields) def test_filter_fields(self): docman = self.opman.doc_managers[0] conn = self.opman.primary_client include_fields = ["a", "b", "c"] exclude_fields = ["d", "e", "f"] # Set fields to care about self.reset_include_fields(include_fields) # Documents have more than just these fields doc = {"a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6, "_id": 1} db = conn["test"]["test"] db.insert_one(doc) assert_soon(lambda: db.count() == 1) self.opman.dump_collection() result = docman._search()[0] keys = result.keys() for inc, exc in zip(include_fields, exclude_fields): self.assertIn(inc, keys) self.assertNotIn(exc, keys) def test_filter_exclude_oplog_entry(self): # Test oplog entries: these are callables, since # filter_oplog_entry modifies the oplog entry in-place def insert_op(): return {"op": "i", "o": {"_id": 0, "a": 1, "b": 2, "c": 3}} def update_op(): return { "op": "u", "o": {"$set": {"a": 4, "b": 5}, "$unset": {"c": True}}, "o2": {"_id": 1}, } def filter_doc(document, fields): if fields and "_id" in fields: fields.remove("_id") return self.opman.filter_oplog_entry(document, exclude_fields=fields) # Case 0: insert op, no fields provided filtered = filter_doc(insert_op(), None) self.assertEqual(filtered, insert_op()) # Case 1: insert op, fields provided filtered = filter_doc(insert_op(), ["c"]) self.assertEqual(filtered["o"], {"_id": 0, "a": 1, "b": 2}) # Case 2: insert op, fields provided, doc becomes empty except for _id filtered = filter_doc(insert_op(), ["a", "b", "c"]) self.assertEqual(filtered["o"], {"_id": 0}) # Case 3: update op, no fields provided filtered = filter_doc(update_op(), None) self.assertEqual(filtered, update_op()) # Case 4: update op, fields provided filtered = filter_doc(update_op(), ["b"]) self.assertNotIn("b", filtered["o"]["$set"]) self.assertIn("a", filtered["o"]["$set"]) self.assertEqual(filtered["o"]["$unset"], update_op()["o"]["$unset"]) # Case 5: update op, fields provided, empty $set filtered = filter_doc(update_op(), ["a", "b"]) self.assertNotIn("$set", filtered["o"]) self.assertEqual(filtered["o"]["$unset"], update_op()["o"]["$unset"]) # Case 6: update op, fields provided, empty $unset filtered = filter_doc(update_op(), ["c"]) self.assertNotIn("$unset", filtered["o"]) self.assertEqual(filtered["o"]["$set"], update_op()["o"]["$set"]) # Case 7: update op, fields provided, entry is nullified filtered = filter_doc(update_op(), ["a", "b", "c"]) self.assertEqual(filtered, None) # Case 8: update op, fields provided, replacement filtered = filter_doc( {"op": "u", "o": {"a": 1, "b": 2, "c": 3, "d": 4}}, ["d", "e", "f"] ) self.assertEqual(filtered, {"op": "u", "o": {"a": 1, "b": 2, "c": 3}}) def test_filter_oplog_entry(self): # Test oplog entries: these are callables, since # filter_oplog_entry modifies the oplog entry in-place def insert_op(): return {"op": "i", "o": {"_id": 0, "a": 1, "b": 2, "c": 3}} def update_op(): return { "op": "u", "o": {"$set": {"a": 4, "b": 5}, "$unset": {"c": True}}, "o2": {"_id": 1}, } def filter_doc(document, fields): if fields and "_id" not in fields: fields.append("_id") return self.opman.filter_oplog_entry(document, include_fields=fields) # Case 0: insert op, no fields provided filtered = filter_doc(insert_op(), None) self.assertEqual(filtered, insert_op()) # Case 1: insert op, fields provided filtered = filter_doc(insert_op(), ["a", "b"]) self.assertEqual(filtered["o"], {"_id": 0, "a": 1, "b": 2}) # Case 2: insert op, fields provided, doc becomes empty except for _id filtered = filter_doc(insert_op(), ["d", "e", "f"]) self.assertEqual(filtered["o"], {"_id": 0}) # Case 3: update op, no fields provided filtered = filter_doc(update_op(), None) self.assertEqual(filtered, update_op()) # Case 4: update op, fields provided filtered = filter_doc(update_op(), ["a", "c"]) self.assertNotIn("b", filtered["o"]["$set"]) self.assertIn("a", filtered["o"]["$set"]) self.assertEqual(filtered["o"]["$unset"], update_op()["o"]["$unset"]) # Case 5: update op, fields provided, empty $set filtered = filter_doc(update_op(), ["c"]) self.assertNotIn("$set", filtered["o"]) self.assertEqual(filtered["o"]["$unset"], update_op()["o"]["$unset"]) # Case 6: update op, fields provided, empty $unset filtered = filter_doc(update_op(), ["a", "b"]) self.assertNotIn("$unset", filtered["o"]) self.assertEqual(filtered["o"]["$set"], update_op()["o"]["$set"]) # Case 7: update op, fields provided, entry is nullified filtered = filter_doc(update_op(), ["d", "e", "f"]) self.assertEqual(filtered, None) # Case 8: update op, fields provided, replacement filtered = filter_doc( {"op": "u", "o": {"a": 1, "b": 2, "c": 3, "d": 4}}, ["a", "b", "c"] ) self.assertEqual(filtered, {"op": "u", "o": {"a": 1, "b": 2, "c": 3}}) def test_nested_fields(self): def check_nested(document, fields, filtered_document, op="i"): if "_id" not in fields: fields.append("_id") filtered_result = self.opman.filter_oplog_entry( {"op": op, "o": document}, include_fields=fields ) if filtered_result is not None: filtered_result = filtered_result["o"] self.assertEqual(filtered_result, filtered_document) document = {"name": "Han Solo", "a": {"b": {}}} fields = ["name", "a.b.c"] filtered_document = {"name": "Han Solo"} check_nested(document, fields, filtered_document) document = {"a": {"b": {"c": 2, "e": 3}, "e": 5}, "b": 2, "c": {"g": 1}} fields = ["a.b.c", "a.e"] filtered_document = {"a": {"b": {"c": 2}, "e": 5}} check_nested(document, fields, filtered_document) document = { "a": {"b": {"c": 2, "e": 3}, "e": 5}, "b": 2, "c": {"g": 1}, "_id": 1, } fields = ["a.b.c", "a.e"] filtered_document = {"a": {"b": {"c": 2}, "e": 5}, "_id": 1} check_nested(document, fields, filtered_document) document = {"a": {"b": {"c": {"d": 1}}}, "-a": {"-b": {"-c": 2}}} fields = ["a.b", "-a"] filtered_document = document.copy() check_nested(document, fields, filtered_document) document = {"a": {"b": {"c": {"d": 1}}}, "-a": {"-b": {"-c": 2}}} fields = ["a", "-a.-b"] filtered_document = document.copy() check_nested(document, fields, filtered_document) document = {"a": {"b": {"c": {"d": 1}}}, "-a": {"-b": {"-c": 2}}, "_id": 1} fields = ["a.b", "-a"] filtered_document = document.copy() check_nested(document, fields, filtered_document) fields = ["a", "-a.-b"] check_nested(document, fields, filtered_document) document = {"test": 1} fields = ["doesnt_exist"] filtered_document = {} check_nested(document, fields, filtered_document) document = {"a": {"b": 1}, "b": {"a": 1}} fields = ["a.b", "b.a"] filtered_document = document.copy() check_nested(document, fields, filtered_document) document = {"a": {"b": {"a": {"b": 1}}}, "c": {"a": {"b": 1}}} fields = ["a.b"] filtered_document = {"a": {"b": {"a": {"b": 1}}}} check_nested(document, fields, filtered_document) document = {"name": "anna", "name_of_cat": "pushkin"} fields = ["name"] filtered_document = {"name": "anna"} check_nested(document, fields, filtered_document) update = {"$set": {"a.b": 1, "a.c": 3, "b": 2, "c": {"b": 3}}} fields = ["a", "c"] filtered_update = {"$set": {"a.b": 1, "a.c": 3, "c": {"b": 3}}} check_nested(update, fields, filtered_update, op="u") update = {"$set": {"a.b": {"c": 3, "d": 1}, "a.e": 1, "a.f": 2}} fields = ["a.b.c", "a.e"] filtered_update = {"$set": {"a.b": {"c": 3}, "a.e": 1}} check_nested(update, fields, filtered_update, op="u") update = {"$set": {"a.b.1": 1, "a.b.2": 2, "b": 3}} fields = ["a.b"] filtered_update = {"$set": {"a.b.1": 1, "a.b.2": 2}} check_nested(update, fields, filtered_update, op="u") update = {"$set": {"a.b": {"c": 3, "d": 1}, "a.e": 1}} fields = ["a.b.e"] filtered_update = None check_nested(update, fields, filtered_update, op="u") def test_nested_exclude_fields(self): def check_nested(document, exclude_fields, filtered_document, op="i"): if "_id" in exclude_fields: exclude_fields.remove("_id") filtered_result = self.opman.filter_oplog_entry( {"op": op, "o": document}, exclude_fields=exclude_fields ) if filtered_result is not None: filtered_result = filtered_result["o"] self.assertEqual(filtered_result, filtered_document) document = {"a": {"b": {"c": {"d": 0, "e": 1}}}} exclude_fields = ["a.b.c.d"] filtered_document = {"a": {"b": {"c": {"e": 1}}}} check_nested(document, exclude_fields, filtered_document) document = {"a": {"b": {"c": {"-a": 0, "d": {"e": {"f": 1}}}}}} exclude_fields = ["a.b.c.d.e.f"] filtered_document = {"a": {"b": {"c": {"-a": 0, "d": {"e": {}}}}}} check_nested(document, exclude_fields, filtered_document) document = {"a": 1} exclude_fields = ["a"] filtered_document = {} check_nested(document, exclude_fields, filtered_document) document = {"a": {"b": {"c": 2, "e": 3}, "e": 5}, "b": 2, "c": {"g": 1}} exclude_fields = ["a.b.c", "a.e"] filtered_document = {"a": {"b": {"e": 3}}, "b": 2, "c": {"g": 1}} check_nested(document, exclude_fields, filtered_document) document = { "a": {"b": {"c": 2, "e": 3}, "e": 5}, "b": 2, "c": {"g": 1}, "_id": 1, } exclude_fields = ["a.b.c", "a.e", "_id"] filtered_document = {"a": {"b": {"e": 3}}, "b": 2, "c": {"g": 1}, "_id": 1} check_nested(document, exclude_fields, filtered_document) document = {"a": {"b": {"c": {"d": 1}}}, "-a": {"-b": {"-c": 2}}} exclude_fields = ["a.b", "-a"] filtered_document = {"a": {}} check_nested(document, exclude_fields, filtered_document) document = {"a": {"b": {"c": {"d": 1}}}, "-a": {"-b": {"-c": 2}}} exclude_fields = ["a", "-a.-b"] filtered_document = {"-a": {}} check_nested(document, exclude_fields, filtered_document) document = {"a": {"b": {"c": {"d": 1}}}, "-a": {"-b": {"-c": 2}}, "_id": 1} exclude_fields = ["a.b", "-a"] filtered_document = {"_id": 1, "a": {}} check_nested(document, exclude_fields, filtered_document) document = {"test": 1} exclude_fields = ["doesnt_exist"] filtered_document = document.copy() check_nested(document, exclude_fields, filtered_document) document = {"test": 1} exclude_fields = ["test.doesnt_exist"] filtered_document = document.copy() check_nested(document, exclude_fields, filtered_document) document = {"a": {"b": 1}, "b": {"a": 1}} exclude_fields = ["a.b", "b.a"] filtered_document = {"a": {}, "b": {}} check_nested(document, exclude_fields, filtered_document) document = {"a": {"b": {"a": {"b": 1}}}, "c": {"a": {"b": 1}}} exclude_fields = ["a.b"] filtered_document = {"a": {}, "c": {"a": {"b": 1}}} check_nested(document, exclude_fields, filtered_document) document = {"name": "anna", "name_of_cat": "pushkin"} exclude_fields = ["name"] filtered_document = {"name_of_cat": "pushkin"} check_nested(document, exclude_fields, filtered_document) update = {"$set": {"a.b": 1, "a.c": 3, "b": 2, "c": {"b": 3}}} exclude_fields = ["a", "c"] filtered_update = {"$set": {"b": 2}} check_nested(update, exclude_fields, filtered_update, op="u") update = {"$set": {"a.b": {"c": 3, "d": 1}, "a.e": 1, "a.f": 2}} exclude_fields = ["a.b.c", "a.e"] filtered_update = {"$set": {"a.b": {"d": 1}, "a.f": 2}} check_nested(update, exclude_fields, filtered_update, op="u") update = {"$set": {"a.b": {"c": 3, "d": 1}, "a.e": 1}} exclude_fields = ["a.b.c", "a.b.d", "a.e"] filtered_update = {"$set": {"a.b": {}}} check_nested(update, exclude_fields, filtered_update, op="u") update = {"$set": {"a.b.1": 1, "a.b.2": 2, "b": 3}} exclude_fields = ["a.b"] filtered_update = {"$set": {"b": 3}} check_nested(update, exclude_fields, filtered_update, op="u") update = {"$set": {"a.b.c": 42, "d.e.f": 123, "g": 456}} exclude_fields = ["a.b", "d"] filtered_update = {"$set": {"g": 456}} check_nested(update, exclude_fields, filtered_update, op="u") update = {"$set": {"a.b": {"c": 3, "d": 1}, "a.e": 1}} exclude_fields = ["a.b", "a.e"] filtered_update = None check_nested(update, exclude_fields, filtered_update, op="u")
class TestFilterFields(unittest.TestCase): @classmethod def setUpClass(cls): cls.repl_set = ReplicaSetSingle().start() cls.primary_conn = cls.repl_set.client() cls.oplog_coll = cls.primary_conn.local['oplog.rs'] @classmethod def tearDownClass(cls): cls.primary_conn.drop_database("test") close_client(cls.primary_conn) cls.repl_set.stop() def setUp(self): self.namespace_config = NamespaceConfig() self.opman = OplogThread( primary_client=self.primary_conn, doc_managers=(DocManager(),), oplog_progress_dict=LockingDict(), namespace_config=self.namespace_config ) def tearDown(self): try: self.opman.join() except RuntimeError: # OplogThread may not have been started pass def reset_include_fields(self, fields): self.opman.namespace_config = NamespaceConfig(include_fields=fields) def reset_exclude_fields(self, fields): self.opman.namespace_config = NamespaceConfig(exclude_fields=fields) def test_filter_fields(self): docman = self.opman.doc_managers[0] conn = self.opman.primary_client include_fields = ["a", "b", "c"] exclude_fields = ["d", "e", "f"] # Set fields to care about self.reset_include_fields(include_fields) # Documents have more than just these fields doc = { "a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6, "_id": 1 } db = conn['test']['test'] db.insert_one(doc) assert_soon(lambda: db.count() == 1) self.opman.dump_collection() result = docman._search()[0] keys = result.keys() for inc, exc in zip(include_fields, exclude_fields): self.assertIn(inc, keys) self.assertNotIn(exc, keys) def test_filter_exclude_oplog_entry(self): # Test oplog entries: these are callables, since # filter_oplog_entry modifies the oplog entry in-place insert_op = lambda: { "op": "i", "o": { "_id": 0, "a": 1, "b": 2, "c": 3 } } update_op = lambda: { "op": "u", "o": { "$set": { "a": 4, "b": 5 }, "$unset": { "c": True } }, "o2": { "_id": 1 } } def filter_doc(document, fields): if fields and '_id' in fields: fields.remove('_id') return self.opman.filter_oplog_entry( document, exclude_fields=fields) # Case 0: insert op, no fields provided filtered = filter_doc(insert_op(), None) self.assertEqual(filtered, insert_op()) # Case 1: insert op, fields provided filtered = filter_doc(insert_op(), ['c']) self.assertEqual(filtered['o'], {'_id': 0, 'a': 1, 'b': 2}) # Case 2: insert op, fields provided, doc becomes empty except for _id filtered = filter_doc(insert_op(), ['a', 'b', 'c']) self.assertEqual(filtered['o'], {'_id': 0}) # Case 3: update op, no fields provided filtered = filter_doc(update_op(), None) self.assertEqual(filtered, update_op()) # Case 4: update op, fields provided filtered = filter_doc(update_op(), ['b']) self.assertNotIn('b', filtered['o']['$set']) self.assertIn('a', filtered['o']['$set']) self.assertEqual(filtered['o']['$unset'], update_op()['o']['$unset']) # Case 5: update op, fields provided, empty $set filtered = filter_doc(update_op(), ['a', 'b']) self.assertNotIn('$set', filtered['o']) self.assertEqual(filtered['o']['$unset'], update_op()['o']['$unset']) # Case 6: update op, fields provided, empty $unset filtered = filter_doc(update_op(), ['c']) self.assertNotIn('$unset', filtered['o']) self.assertEqual(filtered['o']['$set'], update_op()['o']['$set']) # Case 7: update op, fields provided, entry is nullified filtered = filter_doc(update_op(), ['a', 'b', 'c']) self.assertEqual(filtered, None) # Case 8: update op, fields provided, replacement filtered = filter_doc({ 'op': 'u', 'o': {'a': 1, 'b': 2, 'c': 3, 'd': 4} }, ['d', 'e', 'f']) self.assertEqual( filtered, {'op': 'u', 'o': {'a': 1, 'b': 2, 'c': 3}}) def test_filter_oplog_entry(self): # Test oplog entries: these are callables, since # filter_oplog_entry modifies the oplog entry in-place insert_op = lambda: { "op": "i", "o": { "_id": 0, "a": 1, "b": 2, "c": 3 } } update_op = lambda: { "op": "u", "o": { "$set": { "a": 4, "b": 5 }, "$unset": { "c": True } }, "o2": { "_id": 1 } } def filter_doc(document, fields): if fields and '_id' not in fields: fields.append('_id') return self.opman.filter_oplog_entry( document, include_fields=fields) # Case 0: insert op, no fields provided filtered = filter_doc(insert_op(), None) self.assertEqual(filtered, insert_op()) # Case 1: insert op, fields provided filtered = filter_doc(insert_op(), ['a', 'b']) self.assertEqual(filtered['o'], {'_id': 0, 'a': 1, 'b': 2}) # Case 2: insert op, fields provided, doc becomes empty except for _id filtered = filter_doc(insert_op(), ['d', 'e', 'f']) self.assertEqual(filtered['o'], {'_id': 0}) # Case 3: update op, no fields provided filtered = filter_doc(update_op(), None) self.assertEqual(filtered, update_op()) # Case 4: update op, fields provided filtered = filter_doc(update_op(), ['a', 'c']) self.assertNotIn('b', filtered['o']['$set']) self.assertIn('a', filtered['o']['$set']) self.assertEqual(filtered['o']['$unset'], update_op()['o']['$unset']) # Case 5: update op, fields provided, empty $set filtered = filter_doc(update_op(), ['c']) self.assertNotIn('$set', filtered['o']) self.assertEqual(filtered['o']['$unset'], update_op()['o']['$unset']) # Case 6: update op, fields provided, empty $unset filtered = filter_doc(update_op(), ['a', 'b']) self.assertNotIn('$unset', filtered['o']) self.assertEqual(filtered['o']['$set'], update_op()['o']['$set']) # Case 7: update op, fields provided, entry is nullified filtered = filter_doc(update_op(), ['d', 'e', 'f']) self.assertEqual(filtered, None) # Case 8: update op, fields provided, replacement filtered = filter_doc({ 'op': 'u', 'o': {'a': 1, 'b': 2, 'c': 3, 'd': 4} }, ['a', 'b', 'c']) self.assertEqual( filtered, {'op': 'u', 'o': {'a': 1, 'b': 2, 'c': 3}}) def test_nested_fields(self): def check_nested(document, fields, filtered_document, op='i'): if '_id' not in fields: fields.append('_id') filtered_result = self.opman.filter_oplog_entry( {'op': op, 'o': document}, include_fields=fields) if filtered_result is not None: filtered_result = filtered_result['o'] self.assertEqual(filtered_result, filtered_document) document = {'name': 'Han Solo', 'a': {'b': {}}} fields = ['name', 'a.b.c'] filtered_document = {'name': 'Han Solo'} check_nested(document, fields, filtered_document) document = {'a': {'b': {'c': 2, 'e': 3}, 'e': 5}, 'b': 2, 'c': {'g': 1}} fields = ['a.b.c', 'a.e'] filtered_document = {'a': {'b': {'c': 2}, 'e': 5}} check_nested(document, fields, filtered_document) document = {'a': {'b': {'c': 2, 'e': 3}, 'e': 5}, 'b': 2, 'c': {'g': 1}, '_id': 1} fields = ['a.b.c', 'a.e'] filtered_document = {'a': {'b': {'c': 2}, 'e': 5}, '_id': 1} check_nested(document, fields, filtered_document) document = {'a': {'b': {'c': {'d': 1}}}, '-a': {'-b': {'-c': 2}}} fields = ['a.b', '-a'] filtered_document = document.copy() check_nested(document, fields, filtered_document) document = {'a': {'b': {'c': {'d': 1}}}, '-a': {'-b': {'-c': 2}}} fields = ['a', '-a.-b'] filtered_document = document.copy() check_nested(document, fields, filtered_document) document = {'a': {'b': {'c': {'d': 1}}}, '-a': {'-b': {'-c': 2}}, '_id': 1} fields = ['a.b', '-a'] filtered_document = document.copy() check_nested(document, fields, filtered_document) fields = ['a', '-a.-b'] check_nested(document, fields, filtered_document) document = {'test': 1} fields = ['doesnt_exist'] filtered_document = {} check_nested(document, fields, filtered_document) document = {'a': {'b': 1}, 'b': {'a': 1}} fields = ['a.b', 'b.a'] filtered_document = document.copy() check_nested(document, fields, filtered_document) document = {'a': {'b': {'a': {'b': 1}}}, 'c': {'a': {'b': 1}}} fields = ['a.b'] filtered_document = {'a': {'b': {'a': {'b': 1}}}} check_nested(document, fields, filtered_document) document = {'name': 'anna', 'name_of_cat': 'pushkin'} fields = ['name'] filtered_document = {'name': 'anna'} check_nested(document, fields, filtered_document) update = {'$set': {'a.b': 1, 'a.c': 3, 'b': 2, 'c': {'b': 3}}} fields = ['a', 'c'] filtered_update = {'$set': {'a.b': 1, 'a.c': 3, 'c': {'b': 3}}} check_nested(update, fields, filtered_update, op='u') update = {'$set': {'a.b': {'c': 3, 'd': 1}, 'a.e': 1, 'a.f': 2}} fields = ['a.b.c', 'a.e'] filtered_update = {'$set': {'a.b': {'c': 3}, 'a.e': 1}} check_nested(update, fields, filtered_update, op='u') update = {'$set': {'a.b.1': 1, 'a.b.2': 2, 'b': 3}} fields = ['a.b'] filtered_update = {'$set': {'a.b.1': 1, 'a.b.2': 2}} check_nested(update, fields, filtered_update, op='u') update = {'$set': {'a.b': {'c': 3, 'd': 1}, 'a.e': 1}} fields = ['a.b.e'] filtered_update = None check_nested(update, fields, filtered_update, op='u') def test_nested_exclude_fields(self): def check_nested(document, exclude_fields, filtered_document, op='i'): if '_id' in exclude_fields: exclude_fields.remove('_id') filtered_result = self.opman.filter_oplog_entry( {'op': op, 'o': document}, exclude_fields=exclude_fields) if filtered_result is not None: filtered_result = filtered_result['o'] self.assertEqual(filtered_result, filtered_document) document = {'a': {'b': {'c': {'d': 0, 'e': 1}}}} exclude_fields = ['a.b.c.d'] filtered_document = {'a': {'b': {'c': {'e': 1}}}} check_nested(document, exclude_fields, filtered_document) document = {'a': {'b': {'c': {'-a': 0, 'd': {'e': {'f': 1}}}}}} exclude_fields = ['a.b.c.d.e.f'] filtered_document = {'a': {'b': {'c': {'-a': 0, 'd': {'e': {}}}}}} check_nested(document, exclude_fields, filtered_document) document = {'a': 1} exclude_fields = ['a'] filtered_document = {} check_nested(document, exclude_fields, filtered_document) document = {'a': {'b': {'c': 2, 'e': 3}, 'e': 5}, 'b': 2, 'c': {'g': 1}} exclude_fields = ['a.b.c', 'a.e'] filtered_document = {'a': {'b': {'e': 3}}, 'b': 2, 'c': {'g': 1}} check_nested(document, exclude_fields, filtered_document) document = {'a': {'b': {'c': 2, 'e': 3}, 'e': 5}, 'b': 2, 'c': {'g': 1}, '_id': 1} exclude_fields = ['a.b.c', 'a.e', '_id'] filtered_document = {'a': {'b': {'e': 3}}, 'b': 2, 'c': {'g': 1}, '_id': 1} check_nested(document, exclude_fields, filtered_document) document = {'a': {'b': {'c': {'d': 1}}}, '-a': {'-b': {'-c': 2}}} exclude_fields = ['a.b', '-a'] filtered_document = {'a': {}} check_nested(document, exclude_fields, filtered_document) document = {'a': {'b': {'c': {'d': 1}}}, '-a': {'-b': {'-c': 2}}} exclude_fields = ['a', '-a.-b'] filtered_document = {'-a': {}} check_nested(document, exclude_fields, filtered_document) document = {'a': {'b': {'c': {'d': 1}}}, '-a': {'-b': {'-c': 2}}, '_id': 1} exclude_fields = ['a.b', '-a'] filtered_document = {'_id': 1, 'a': {}} check_nested(document, exclude_fields, filtered_document) document = {'test': 1} exclude_fields = ['doesnt_exist'] filtered_document = document.copy() check_nested(document, exclude_fields, filtered_document) document = {'test': 1} exclude_fields = ['test.doesnt_exist'] filtered_document = document.copy() check_nested(document, exclude_fields, filtered_document) document = {'a': {'b': 1}, 'b': {'a': 1}} exclude_fields = ['a.b', 'b.a'] filtered_document = {'a': {}, 'b': {}} check_nested(document, exclude_fields, filtered_document) document = {'a': {'b': {'a': {'b': 1}}}, 'c': {'a': {'b': 1}}} exclude_fields = ['a.b'] filtered_document = {'a': {}, 'c': {'a': {'b': 1}}} check_nested(document, exclude_fields, filtered_document) document = {'name': 'anna', 'name_of_cat': 'pushkin'} exclude_fields = ['name'] filtered_document = {'name_of_cat': 'pushkin'} check_nested(document, exclude_fields, filtered_document) update = {'$set': {'a.b': 1, 'a.c': 3, 'b': 2, 'c': {'b': 3}}} exclude_fields = ['a', 'c'] filtered_update = {'$set': {'b': 2}} check_nested(update, exclude_fields, filtered_update, op='u') update = {'$set': {'a.b': {'c': 3, 'd': 1}, 'a.e': 1, 'a.f': 2}} exclude_fields = ['a.b.c', 'a.e'] filtered_update = {'$set': {'a.b': {'d': 1}, 'a.f': 2}} check_nested(update, exclude_fields, filtered_update, op='u') update = {'$set': {'a.b': {'c': 3, 'd': 1}, 'a.e': 1}} exclude_fields = ['a.b.c', 'a.b.d', 'a.e'] filtered_update = {'$set': {'a.b': {}}} check_nested(update, exclude_fields, filtered_update, op='u') update = {'$set': {'a.b.1': 1, 'a.b.2': 2, 'b': 3}} exclude_fields = ['a.b'] filtered_update = {'$set': {'b': 3}} check_nested(update, exclude_fields, filtered_update, op='u') update = {'$set': {'a.b.c': 42, 'd.e.f': 123, 'g': 456}} exclude_fields = ['a.b', 'd'] filtered_update = {'$set': {'g': 456}} check_nested(update, exclude_fields, filtered_update, op='u') update = {'$set': {'a.b': {'c': 3, 'd': 1}, 'a.e': 1}} exclude_fields = ['a.b', 'a.e'] filtered_update = None check_nested(update, exclude_fields, filtered_update, op='u')
class TestOplogManager(unittest.TestCase): """Defines all the testing methods, as well as a method that sets up the cluster """ def setUp(self): _, _, self.primary_p = start_replica_set('test-oplog-manager') self.primary_conn = pymongo.MongoClient(mongo_host, self.primary_p) self.oplog_coll = self.primary_conn.local['oplog.rs'] self.opman = OplogThread(primary_conn=self.primary_conn, main_address='%s:%d' % (mongo_host, self.primary_p), oplog_coll=self.oplog_coll, is_sharded=False, doc_manager=DocManager(), oplog_progress_dict=LockingDict(), namespace_set=None, auth_key=None, auth_username=None, repl_set='test-oplog-manager') def tearDown(self): try: self.opman.join() except RuntimeError: pass # OplogThread may not have been started self.primary_conn.close() kill_replica_set('test-oplog-manager') def test_get_oplog_cursor(self): '''Test the get_oplog_cursor method''' # timestamp is None - all oplog entries are returned. cursor = self.opman.get_oplog_cursor(None) self.assertEqual(cursor.count(), self.primary_conn["local"]["oplog.rs"].count()) # earliest entry is the only one at/after timestamp doc = {"ts": bson.Timestamp(1000, 0), "i": 1} self.primary_conn["test"]["test"].insert(doc) latest_timestamp = self.opman.get_last_oplog_timestamp() cursor = self.opman.get_oplog_cursor(latest_timestamp) self.assertNotEqual(cursor, None) self.assertEqual(cursor.count(), 1) next_entry_id = next(cursor)['o']['_id'] retrieved = self.primary_conn.test.test.find_one(next_entry_id) self.assertEqual(retrieved, doc) # many entries before and after timestamp self.primary_conn["test"]["test"].insert({"i": i} for i in range(2, 1002)) oplog_cursor = self.oplog_coll.find(sort=[("ts", pymongo.ASCENDING)]) # startup + insert + 1000 inserts self.assertEqual(oplog_cursor.count(), 2 + 1000) pivot = oplog_cursor.skip(400).limit(1)[0] goc_cursor = self.opman.get_oplog_cursor(pivot["ts"]) self.assertEqual(goc_cursor.count(), 2 + 1000 - 400) def test_get_last_oplog_timestamp(self): """Test the get_last_oplog_timestamp method""" # "empty" the oplog self.opman.oplog = self.primary_conn["test"]["emptycollection"] self.assertEqual(self.opman.get_last_oplog_timestamp(), None) # Test non-empty oplog self.opman.oplog = self.primary_conn["local"]["oplog.rs"] for i in range(1000): self.primary_conn["test"]["test"].insert({"i": i + 500}) oplog = self.primary_conn["local"]["oplog.rs"] oplog = oplog.find().sort("$natural", pymongo.DESCENDING).limit(1)[0] self.assertEqual(self.opman.get_last_oplog_timestamp(), oplog["ts"]) def test_dump_collection(self): """Test the dump_collection method Cases: 1. empty oplog 2. non-empty oplog """ # Test with empty oplog self.opman.oplog = self.primary_conn["test"]["emptycollection"] last_ts = self.opman.dump_collection() self.assertEqual(last_ts, None) # Test with non-empty oplog self.opman.oplog = self.primary_conn["local"]["oplog.rs"] for i in range(1000): self.primary_conn["test"]["test"].insert({"i": i + 500}) last_ts = self.opman.get_last_oplog_timestamp() self.assertEqual(last_ts, self.opman.dump_collection()) self.assertEqual(len(self.opman.doc_managers[0]._search()), 1000) def test_dump_collection_with_error(self): """Test the dump_collection method with invalid documents. Cases: 1. non-empty oplog, continue_on_error=True, invalid documents """ # non-empty oplog, continue_on_error=True, invalid documents self.opman.continue_on_error = True self.opman.oplog = self.primary_conn["local"]["oplog.rs"] docs = [{'a': i} for i in range(100)] for i in range(50, 60): docs[i]['_upsert_exception'] = True self.primary_conn['test']['test'].insert(docs) last_ts = self.opman.get_last_oplog_timestamp() self.assertEqual(last_ts, self.opman.dump_collection()) docs = self.opman.doc_managers[0]._search() docs.sort() self.assertEqual(len(docs), 90) for doc, correct_a in zip(docs, range(0, 50) + range(60, 100)): self.assertEquals(doc['a'], correct_a) def test_init_cursor(self): """Test the init_cursor method Cases: 1. no last checkpoint, no collection dump 2. no last checkpoint, collection dump ok and stuff to dump 3. no last checkpoint, nothing to dump, stuff in oplog 4. no last checkpoint, nothing to dump, nothing in oplog 5. no last checkpoint, no collection dump, stuff in oplog 6. last checkpoint exists 7. last checkpoint is behind """ # N.B. these sub-cases build off of each other and cannot be re-ordered # without side-effects # No last checkpoint, no collection dump, nothing in oplog # "change oplog collection" to put nothing in oplog self.opman.oplog = self.primary_conn["test"]["emptycollection"] self.opman.collection_dump = False self.assertTrue( all(doc['op'] == 'n' for doc in self.opman.init_cursor()[0])) self.assertEqual(self.opman.checkpoint, None) # No last checkpoint, empty collections, nothing in oplog self.opman.collection_dump = True cursor, cursor_len = self.opman.init_cursor() self.assertEqual(cursor, None) self.assertEqual(cursor_len, 0) self.assertEqual(self.opman.checkpoint, None) # No last checkpoint, empty collections, something in oplog self.opman.oplog = self.primary_conn['local']['oplog.rs'] collection = self.primary_conn["test"]["test"] collection.insert({"i": 1}) collection.remove({"i": 1}) time.sleep(3) last_ts = self.opman.get_last_oplog_timestamp() cursor, cursor_len = self.opman.init_cursor() self.assertEqual(cursor_len, 0) self.assertEqual(self.opman.checkpoint, last_ts) with self.opman.oplog_progress as prog: self.assertEqual(prog.get_dict()[str(self.opman.oplog)], last_ts) # No last checkpoint, no collection dump, something in oplog self.opman.oplog_progress = LockingDict() self.opman.collection_dump = False collection.insert({"i": 2}) last_ts = self.opman.get_last_oplog_timestamp() cursor, cursor_len = self.opman.init_cursor() for i in range(cursor_len - 1): next(cursor) self.assertEqual(next(cursor)['o']['i'], 2) self.assertEqual(self.opman.checkpoint, last_ts) # Last checkpoint exists progress = LockingDict() self.opman.oplog_progress = progress for i in range(1000): collection.insert({"i": i + 500}) entry = list(self.primary_conn["local"]["oplog.rs"].find(skip=200, limit=2)) progress.get_dict()[str(self.opman.oplog)] = entry[0]["ts"] self.opman.oplog_progress = progress self.opman.checkpoint = None cursor, cursor_len = self.opman.init_cursor() self.assertEqual(next(cursor)["ts"], entry[1]["ts"]) self.assertEqual(self.opman.checkpoint, entry[0]["ts"]) with self.opman.oplog_progress as prog: self.assertEqual(prog.get_dict()[str(self.opman.oplog)], entry[0]["ts"]) # Last checkpoint is behind progress = LockingDict() progress.get_dict()[str(self.opman.oplog)] = bson.Timestamp(1, 0) self.opman.oplog_progress = progress self.opman.checkpoint = None cursor, cursor_len = self.opman.init_cursor() self.assertEqual(cursor_len, 0) self.assertEqual(cursor, None) self.assertIsNotNone(self.opman.checkpoint) def test_filter_fields(self): docman = self.opman.doc_managers[0] conn = self.opman.main_connection include_fields = ["a", "b", "c"] exclude_fields = ["d", "e", "f"] # Set fields to care about self.opman.fields = include_fields # Documents have more than just these fields doc = {"a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6, "_id": 1} db = conn['test']['test'] db.insert(doc) assert_soon(lambda: db.count() == 1) self.opman.dump_collection() result = docman._search()[0] keys = result.keys() for inc, exc in zip(include_fields, exclude_fields): self.assertIn(inc, keys) self.assertNotIn(exc, keys) def test_namespace_mapping(self): """Test mapping of namespaces Cases: upsert/delete/update of documents: 1. in namespace set, mapping provided 2. outside of namespace set, mapping provided """ source_ns = ["test.test1", "test.test2"] phony_ns = ["test.phony1", "test.phony2"] dest_mapping = { "test.test1": "test.test1_dest", "test.test2": "test.test2_dest" } self.opman.dest_mapping = dest_mapping self.opman.namespace_set = source_ns docman = self.opman.doc_managers[0] # start replicating self.opman.start() base_doc = {"_id": 1, "name": "superman"} # doc in namespace set for ns in source_ns: db, coll = ns.split(".", 1) # test insert self.primary_conn[db][coll].insert(base_doc) assert_soon(lambda: len(docman._search()) == 1) self.assertEqual(docman._search()[0]["ns"], dest_mapping[ns]) bad = [d for d in docman._search() if d["ns"] == ns] self.assertEqual(len(bad), 0) # test update self.primary_conn[db][coll].update( {"_id": 1}, {"$set": { "weakness": "kryptonite" }}) def update_complete(): docs = docman._search() for d in docs: if d.get("weakness") == "kryptonite": return True return False assert_soon(update_complete) self.assertEqual(docman._search()[0]["ns"], dest_mapping[ns]) bad = [d for d in docman._search() if d["ns"] == ns] self.assertEqual(len(bad), 0) # test delete self.primary_conn[db][coll].remove({"_id": 1}) assert_soon(lambda: len(docman._search()) == 0) bad = [d for d in docman._search() if d["ns"] == dest_mapping[ns]] self.assertEqual(len(bad), 0) # cleanup self.primary_conn[db][coll].remove() self.opman.doc_managers[0]._delete() # doc not in namespace set for ns in phony_ns: db, coll = ns.split(".", 1) # test insert self.primary_conn[db][coll].insert(base_doc) time.sleep(1) self.assertEqual(len(docman._search()), 0) # test update self.primary_conn[db][coll].update( {"_id": 1}, {"$set": { "weakness": "kryptonite" }}) time.sleep(1) self.assertEqual(len(docman._search()), 0) def test_many_targets(self): """Test that one OplogThread is capable of replicating to more than one target. """ doc_managers = [DocManager(), DocManager(), DocManager()] self.opman.doc_managers = doc_managers # start replicating self.opman.start() self.primary_conn["test"]["test"].insert({ "name": "kermit", "color": "green" }) self.primary_conn["test"]["test"].insert({ "name": "elmo", "color": "firetruck red" }) assert_soon( lambda: sum(len(d._search()) for d in doc_managers) == 6, "OplogThread should be able to replicate to multiple targets") self.primary_conn["test"]["test"].remove({"name": "elmo"}) assert_soon( lambda: sum(len(d._search()) for d in doc_managers) == 3, "OplogThread should be able to replicate to multiple targets") for d in doc_managers: self.assertEqual(d._search()[0]["name"], "kermit") def test_filter_oplog_entry(self): # Test oplog entries: these are callables, since # filter_oplog_entry modifies the oplog entry in-place insert_op = lambda: { "op": "i", "o": { "_id": 0, "a": 1, "b": 2, "c": 3 } } update_op = lambda: { "op": "u", "o": { "$set": { "a": 4, "b": 5 }, "$unset": { "c": True } }, "o2": { "_id": 1 } } # Case 0: insert op, no fields provided self.opman.fields = None filtered = self.opman.filter_oplog_entry(insert_op()) self.assertEqual(filtered, insert_op()) # Case 1: insert op, fields provided self.opman.fields = ['a', 'b'] filtered = self.opman.filter_oplog_entry(insert_op()) self.assertEqual(filtered['o'], {'_id': 0, 'a': 1, 'b': 2}) # Case 2: insert op, fields provided, doc becomes empty except for _id self.opman.fields = ['d', 'e', 'f'] filtered = self.opman.filter_oplog_entry(insert_op()) self.assertEqual(filtered['o'], {'_id': 0}) # Case 3: update op, no fields provided self.opman.fields = None filtered = self.opman.filter_oplog_entry(update_op()) self.assertEqual(filtered, update_op()) # Case 4: update op, fields provided self.opman.fields = ['a', 'c'] filtered = self.opman.filter_oplog_entry(update_op()) self.assertNotIn('b', filtered['o']['$set']) self.assertIn('a', filtered['o']['$set']) self.assertEqual(filtered['o']['$unset'], update_op()['o']['$unset']) # Case 5: update op, fields provided, empty $set self.opman.fields = ['c'] filtered = self.opman.filter_oplog_entry(update_op()) self.assertNotIn('$set', filtered['o']) self.assertEqual(filtered['o']['$unset'], update_op()['o']['$unset']) # Case 6: update op, fields provided, empty $unset self.opman.fields = ['a', 'b'] filtered = self.opman.filter_oplog_entry(update_op()) self.assertNotIn('$unset', filtered['o']) self.assertEqual(filtered['o']['$set'], update_op()['o']['$set']) # Case 7: update op, fields provided, entry is nullified self.opman.fields = ['d', 'e', 'f'] filtered = self.opman.filter_oplog_entry(update_op()) self.assertEqual(filtered, None)
class TestOplogManager(unittest.TestCase): """Defines all the testing methods, as well as a method that sets up the cluster """ def setUp(self): _, _, self.primary_p = start_replica_set("test-oplog-manager") self.primary_conn = pymongo.MongoClient(mongo_host, self.primary_p) self.oplog_coll = self.primary_conn.local["oplog.rs"] self.opman = OplogThread( primary_conn=self.primary_conn, main_address="%s:%d" % (mongo_host, self.primary_p), oplog_coll=self.oplog_coll, is_sharded=False, doc_manager=DocManager(), oplog_progress_dict=LockingDict(), namespace_set=None, auth_key=None, auth_username=None, repl_set="test-oplog-manager", ) def tearDown(self): try: self.opman.join() except RuntimeError: pass # OplogThread may not have been started self.primary_conn.close() kill_replica_set("test-oplog-manager") def test_get_oplog_cursor(self): """Test the get_oplog_cursor method""" # timestamp is None - all oplog entries are returned. cursor = self.opman.get_oplog_cursor(None) self.assertEqual(cursor.count(), self.primary_conn["local"]["oplog.rs"].count()) # earliest entry is the only one at/after timestamp doc = {"ts": bson.Timestamp(1000, 0), "i": 1} self.primary_conn["test"]["test"].insert(doc) latest_timestamp = self.opman.get_last_oplog_timestamp() cursor = self.opman.get_oplog_cursor(latest_timestamp) self.assertNotEqual(cursor, None) self.assertEqual(cursor.count(), 1) next_entry_id = next(cursor)["o"]["_id"] retrieved = self.primary_conn.test.test.find_one(next_entry_id) self.assertEqual(retrieved, doc) # many entries before and after timestamp self.primary_conn["test"]["test"].insert({"i": i} for i in range(2, 1002)) oplog_cursor = self.oplog_coll.find(sort=[("ts", pymongo.ASCENDING)]) # startup + insert + 1000 inserts self.assertEqual(oplog_cursor.count(), 2 + 1000) pivot = oplog_cursor.skip(400).limit(1)[0] goc_cursor = self.opman.get_oplog_cursor(pivot["ts"]) self.assertEqual(goc_cursor.count(), 2 + 1000 - 400) def test_get_last_oplog_timestamp(self): """Test the get_last_oplog_timestamp method""" # "empty" the oplog self.opman.oplog = self.primary_conn["test"]["emptycollection"] self.assertEqual(self.opman.get_last_oplog_timestamp(), None) # Test non-empty oplog self.opman.oplog = self.primary_conn["local"]["oplog.rs"] for i in range(1000): self.primary_conn["test"]["test"].insert({"i": i + 500}) oplog = self.primary_conn["local"]["oplog.rs"] oplog = oplog.find().sort("$natural", pymongo.DESCENDING).limit(1)[0] self.assertEqual(self.opman.get_last_oplog_timestamp(), oplog["ts"]) def test_dump_collection(self): """Test the dump_collection method Cases: 1. empty oplog 2. non-empty oplog """ # Test with empty oplog self.opman.oplog = self.primary_conn["test"]["emptycollection"] last_ts = self.opman.dump_collection() self.assertEqual(last_ts, None) # Test with non-empty oplog self.opman.oplog = self.primary_conn["local"]["oplog.rs"] for i in range(1000): self.primary_conn["test"]["test"].insert({"i": i + 500}) last_ts = self.opman.get_last_oplog_timestamp() self.assertEqual(last_ts, self.opman.dump_collection()) self.assertEqual(len(self.opman.doc_managers[0]._search()), 1000) def test_dump_collection_with_error(self): """Test the dump_collection method with invalid documents. Cases: 1. non-empty oplog, continue_on_error=True, invalid documents """ # non-empty oplog, continue_on_error=True, invalid documents self.opman.continue_on_error = True self.opman.oplog = self.primary_conn["local"]["oplog.rs"] docs = [{"a": i} for i in range(100)] for i in range(50, 60): docs[i]["_upsert_exception"] = True self.primary_conn["test"]["test"].insert(docs) last_ts = self.opman.get_last_oplog_timestamp() self.assertEqual(last_ts, self.opman.dump_collection()) docs = self.opman.doc_managers[0]._search() docs.sort() self.assertEqual(len(docs), 90) for doc, correct_a in zip(docs, range(0, 50) + range(60, 100)): self.assertEquals(doc["a"], correct_a) def test_init_cursor(self): """Test the init_cursor method Cases: 1. no last checkpoint, no collection dump 2. no last checkpoint, collection dump ok and stuff to dump 3. no last checkpoint, nothing to dump, stuff in oplog 4. no last checkpoint, nothing to dump, nothing in oplog 5. no last checkpoint, no collection dump, stuff in oplog 6. last checkpoint exists 7. last checkpoint is behind """ # N.B. these sub-cases build off of each other and cannot be re-ordered # without side-effects # No last checkpoint, no collection dump, nothing in oplog # "change oplog collection" to put nothing in oplog self.opman.oplog = self.primary_conn["test"]["emptycollection"] self.opman.collection_dump = False self.assertTrue(all(doc["op"] == "n" for doc in self.opman.init_cursor()[0])) self.assertEqual(self.opman.checkpoint, None) # No last checkpoint, empty collections, nothing in oplog self.opman.collection_dump = True cursor, cursor_len = self.opman.init_cursor() self.assertEqual(cursor, None) self.assertEqual(cursor_len, 0) self.assertEqual(self.opman.checkpoint, None) # No last checkpoint, empty collections, something in oplog self.opman.oplog = self.primary_conn["local"]["oplog.rs"] collection = self.primary_conn["test"]["test"] collection.insert({"i": 1}) collection.remove({"i": 1}) time.sleep(3) last_ts = self.opman.get_last_oplog_timestamp() cursor, cursor_len = self.opman.init_cursor() self.assertEqual(cursor_len, 0) self.assertEqual(self.opman.checkpoint, last_ts) with self.opman.oplog_progress as prog: self.assertEqual(prog.get_dict()[str(self.opman.oplog)], last_ts) # No last checkpoint, no collection dump, something in oplog self.opman.oplog_progress = LockingDict() self.opman.collection_dump = False collection.insert({"i": 2}) last_ts = self.opman.get_last_oplog_timestamp() cursor, cursor_len = self.opman.init_cursor() for i in range(cursor_len - 1): next(cursor) self.assertEqual(next(cursor)["o"]["i"], 2) self.assertEqual(self.opman.checkpoint, last_ts) # Last checkpoint exists progress = LockingDict() self.opman.oplog_progress = progress for i in range(1000): collection.insert({"i": i + 500}) entry = list(self.primary_conn["local"]["oplog.rs"].find(skip=200, limit=2)) progress.get_dict()[str(self.opman.oplog)] = entry[0]["ts"] self.opman.oplog_progress = progress self.opman.checkpoint = None cursor, cursor_len = self.opman.init_cursor() self.assertEqual(next(cursor)["ts"], entry[1]["ts"]) self.assertEqual(self.opman.checkpoint, entry[0]["ts"]) with self.opman.oplog_progress as prog: self.assertEqual(prog.get_dict()[str(self.opman.oplog)], entry[0]["ts"]) # Last checkpoint is behind progress = LockingDict() progress.get_dict()[str(self.opman.oplog)] = bson.Timestamp(1, 0) self.opman.oplog_progress = progress self.opman.checkpoint = None cursor, cursor_len = self.opman.init_cursor() self.assertEqual(cursor_len, 0) self.assertEqual(cursor, None) self.assertIsNotNone(self.opman.checkpoint) def test_filter_fields(self): docman = self.opman.doc_managers[0] conn = self.opman.main_connection include_fields = ["a", "b", "c"] exclude_fields = ["d", "e", "f"] # Set fields to care about self.opman.fields = include_fields # Documents have more than just these fields doc = {"a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6, "_id": 1} db = conn["test"]["test"] db.insert(doc) assert_soon(lambda: db.count() == 1) self.opman.dump_collection() result = docman._search()[0] keys = result.keys() for inc, exc in zip(include_fields, exclude_fields): self.assertIn(inc, keys) self.assertNotIn(exc, keys) def test_namespace_mapping(self): """Test mapping of namespaces Cases: upsert/delete/update of documents: 1. in namespace set, mapping provided 2. outside of namespace set, mapping provided """ source_ns = ["test.test1", "test.test2"] phony_ns = ["test.phony1", "test.phony2"] dest_mapping = {"test.test1": "test.test1_dest", "test.test2": "test.test2_dest"} self.opman.dest_mapping = dest_mapping self.opman.namespace_set = source_ns docman = self.opman.doc_managers[0] # start replicating self.opman.start() base_doc = {"_id": 1, "name": "superman"} # doc in namespace set for ns in source_ns: db, coll = ns.split(".", 1) # test insert self.primary_conn[db][coll].insert(base_doc) assert_soon(lambda: len(docman._search()) == 1) self.assertEqual(docman._search()[0]["ns"], dest_mapping[ns]) bad = [d for d in docman._search() if d["ns"] == ns] self.assertEqual(len(bad), 0) # test update self.primary_conn[db][coll].update({"_id": 1}, {"$set": {"weakness": "kryptonite"}}) def update_complete(): docs = docman._search() for d in docs: if d.get("weakness") == "kryptonite": return True return False assert_soon(update_complete) self.assertEqual(docman._search()[0]["ns"], dest_mapping[ns]) bad = [d for d in docman._search() if d["ns"] == ns] self.assertEqual(len(bad), 0) # test delete self.primary_conn[db][coll].remove({"_id": 1}) assert_soon(lambda: len(docman._search()) == 0) bad = [d for d in docman._search() if d["ns"] == dest_mapping[ns]] self.assertEqual(len(bad), 0) # cleanup self.primary_conn[db][coll].remove() self.opman.doc_managers[0]._delete() # doc not in namespace set for ns in phony_ns: db, coll = ns.split(".", 1) # test insert self.primary_conn[db][coll].insert(base_doc) time.sleep(1) self.assertEqual(len(docman._search()), 0) # test update self.primary_conn[db][coll].update({"_id": 1}, {"$set": {"weakness": "kryptonite"}}) time.sleep(1) self.assertEqual(len(docman._search()), 0) def test_many_targets(self): """Test that one OplogThread is capable of replicating to more than one target. """ doc_managers = [DocManager(), DocManager(), DocManager()] self.opman.doc_managers = doc_managers # start replicating self.opman.start() self.primary_conn["test"]["test"].insert({"name": "kermit", "color": "green"}) self.primary_conn["test"]["test"].insert({"name": "elmo", "color": "firetruck red"}) assert_soon( lambda: sum(len(d._search()) for d in doc_managers) == 6, "OplogThread should be able to replicate to multiple targets", ) self.primary_conn["test"]["test"].remove({"name": "elmo"}) assert_soon( lambda: sum(len(d._search()) for d in doc_managers) == 3, "OplogThread should be able to replicate to multiple targets", ) for d in doc_managers: self.assertEqual(d._search()[0]["name"], "kermit") def test_filter_oplog_entry(self): # Test oplog entries: these are callables, since # filter_oplog_entry modifies the oplog entry in-place insert_op = lambda: {"op": "i", "o": {"_id": 0, "a": 1, "b": 2, "c": 3}} update_op = lambda: {"op": "u", "o": {"$set": {"a": 4, "b": 5}, "$unset": {"c": True}}, "o2": {"_id": 1}} # Case 0: insert op, no fields provided self.opman.fields = None filtered = self.opman.filter_oplog_entry(insert_op()) self.assertEqual(filtered, insert_op()) # Case 1: insert op, fields provided self.opman.fields = ["a", "b"] filtered = self.opman.filter_oplog_entry(insert_op()) self.assertEqual(filtered["o"], {"_id": 0, "a": 1, "b": 2}) # Case 2: insert op, fields provided, doc becomes empty except for _id self.opman.fields = ["d", "e", "f"] filtered = self.opman.filter_oplog_entry(insert_op()) self.assertEqual(filtered["o"], {"_id": 0}) # Case 3: update op, no fields provided self.opman.fields = None filtered = self.opman.filter_oplog_entry(update_op()) self.assertEqual(filtered, update_op()) # Case 4: update op, fields provided self.opman.fields = ["a", "c"] filtered = self.opman.filter_oplog_entry(update_op()) self.assertNotIn("b", filtered["o"]["$set"]) self.assertIn("a", filtered["o"]["$set"]) self.assertEqual(filtered["o"]["$unset"], update_op()["o"]["$unset"]) # Case 5: update op, fields provided, empty $set self.opman.fields = ["c"] filtered = self.opman.filter_oplog_entry(update_op()) self.assertNotIn("$set", filtered["o"]) self.assertEqual(filtered["o"]["$unset"], update_op()["o"]["$unset"]) # Case 6: update op, fields provided, empty $unset self.opman.fields = ["a", "b"] filtered = self.opman.filter_oplog_entry(update_op()) self.assertNotIn("$unset", filtered["o"]) self.assertEqual(filtered["o"]["$set"], update_op()["o"]["$set"]) # Case 7: update op, fields provided, entry is nullified self.opman.fields = ["d", "e", "f"] filtered = self.opman.filter_oplog_entry(update_op()) self.assertEqual(filtered, None)
class TestFilterFields(unittest.TestCase): @classmethod def setUpClass(cls): cls.repl_set = ReplicaSetSingle().start() cls.primary_conn = cls.repl_set.client() cls.oplog_coll = cls.primary_conn.local['oplog.rs'] @classmethod def tearDownClass(cls): cls.primary_conn.drop_database("test") close_client(cls.primary_conn) cls.repl_set.stop() def setUp(self): self.dest_mapping_stru = DestMapping([], [], {}) self.opman = OplogThread( primary_client=self.primary_conn, doc_managers=(DocManager(),), oplog_progress_dict=LockingDict(), dest_mapping_stru=self.dest_mapping_stru ) def tearDown(self): try: self.opman.join() except RuntimeError: # OplogThread may not have been started pass def _check_fields(self, opman, fields, exclude_fields, projection): if fields: self.assertEqual(sorted(opman.fields), sorted(fields)) self.assertEqual(opman._fields, set(fields)) else: self.assertEqual(opman.fields, None) self.assertEqual(opman._fields, set([])) if exclude_fields: self.assertEqual(sorted(opman.exclude_fields), sorted(exclude_fields)) self.assertEqual(opman._exclude_fields, set(exclude_fields)) else: self.assertEqual(opman.exclude_fields, None) self.assertEqual(opman._exclude_fields, set([])) self.assertEqual(opman._projection, projection) def test_filter_fields(self): docman = self.opman.doc_managers[0] conn = self.opman.primary_client include_fields = ["a", "b", "c"] exclude_fields = ["d", "e", "f"] # Set fields to care about self.opman.fields = include_fields # Documents have more than just these fields doc = { "a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6, "_id": 1 } db = conn['test']['test'] db.insert_one(doc) assert_soon(lambda: db.count() == 1) self.opman.dump_collection() result = docman._search()[0] keys = result.keys() for inc, exc in zip(include_fields, exclude_fields): self.assertIn(inc, keys) self.assertNotIn(exc, keys) def test_filter_exclude_oplog_entry(self): # Test oplog entries: these are callables, since # filter_oplog_entry modifies the oplog entry in-place insert_op = lambda: { "op": "i", "o": { "_id": 0, "a": 1, "b": 2, "c": 3 } } update_op = lambda: { "op": "u", "o": { "$set": { "a": 4, "b": 5 }, "$unset": { "c": True } }, "o2": { "_id": 1 } } # Case 0: insert op, no fields provided self.opman.exclude_fields = None filtered = self.opman.filter_oplog_entry(insert_op()) self.assertEqual(filtered, insert_op()) self.assertEqual(None, self.opman._projection) # Case 1: insert op, fields provided self.opman.exclude_fields = ['c'] filtered = self.opman.filter_oplog_entry(insert_op()) self.assertEqual(filtered['o'], {'_id': 0, 'a': 1, 'b': 2}) self.assertEqual({'c': 0}, self.opman._projection) # Case 2: insert op, fields provided, doc becomes empty except for _id self.opman.exclude_fields = ['a', 'b', 'c'] filtered = self.opman.filter_oplog_entry(insert_op()) self.assertEqual(filtered['o'], {'_id': 0}) self.assertEqual({'a': 0, 'b': 0, 'c': 0}, self.opman._projection) # Case 3: update op, no fields provided self.opman.exclude_fields = None filtered = self.opman.filter_oplog_entry(update_op()) self.assertEqual(filtered, update_op()) self.assertEqual(None, self.opman._projection) # Case 4: update op, fields provided self.opman.exclude_fields = ['b'] filtered = self.opman.filter_oplog_entry(update_op()) self.assertNotIn('b', filtered['o']['$set']) self.assertIn('a', filtered['o']['$set']) self.assertEqual(filtered['o']['$unset'], update_op()['o']['$unset']) self.assertEqual({'b': 0}, self.opman._projection) # Case 5: update op, fields provided, empty $set self.opman.exclude_fields = ['a', 'b'] filtered = self.opman.filter_oplog_entry(update_op()) self.assertNotIn('$set', filtered['o']) self.assertEqual(filtered['o']['$unset'], update_op()['o']['$unset']) self.assertEqual({'a': 0, 'b': 0}, self.opman._projection) # Case 6: update op, fields provided, empty $unset self.opman.exclude_fields = ['c'] filtered = self.opman.filter_oplog_entry(update_op()) self.assertNotIn('$unset', filtered['o']) self.assertEqual(filtered['o']['$set'], update_op()['o']['$set']) self.assertEqual({'c': 0}, self.opman._projection) # Case 7: update op, fields provided, entry is nullified self.opman.exclude_fields = ['a', 'b', 'c'] filtered = self.opman.filter_oplog_entry(update_op()) self.assertEqual(filtered, None) self.assertEqual({'a': 0, 'b': 0, 'c': 0}, self.opman._projection) # Case 8: update op, fields provided, replacement self.opman.exclude_fields = ['d', 'e', 'f'] filtered = self.opman.filter_oplog_entry({ 'op': 'u', 'o': {'a': 1, 'b': 2, 'c': 3, 'd': 4} }) self.assertEqual( filtered, {'op': 'u', 'o': {'a': 1, 'b': 2, 'c': 3}}) self.assertEqual({'d': 0, 'e': 0, 'f': 0}, self.opman._projection) def test_filter_oplog_entry(self): # Test oplog entries: these are callables, since # filter_oplog_entry modifies the oplog entry in-place insert_op = lambda: { "op": "i", "o": { "_id": 0, "a": 1, "b": 2, "c": 3 } } update_op = lambda: { "op": "u", "o": { "$set": { "a": 4, "b": 5 }, "$unset": { "c": True } }, "o2": { "_id": 1 } } # Case 0: insert op, no fields provided self.opman.fields = None filtered = self.opman.filter_oplog_entry(insert_op()) self.assertEqual(filtered, insert_op()) self.assertEqual(None, self.opman._projection) # Case 1: insert op, fields provided self.opman.fields = ['a', 'b'] filtered = self.opman.filter_oplog_entry(insert_op()) self.assertEqual(filtered['o'], {'_id': 0, 'a': 1, 'b': 2}) self.assertEqual({'_id': 1, 'a': 1, 'b': 1}, self.opman._projection) # Case 2: insert op, fields provided, doc becomes empty except for _id self.opman.fields = ['d', 'e', 'f'] filtered = self.opman.filter_oplog_entry(insert_op()) self.assertEqual(filtered['o'], {'_id': 0}) self.assertEqual({'_id': 1, 'd': 1, 'e': 1, 'f': 1}, self.opman._projection) # Case 3: update op, no fields provided self.opman.fields = None filtered = self.opman.filter_oplog_entry(update_op()) self.assertEqual(filtered, update_op()) self.assertEqual(None, self.opman._projection) # Case 4: update op, fields provided self.opman.fields = ['a', 'c'] filtered = self.opman.filter_oplog_entry(update_op()) self.assertNotIn('b', filtered['o']['$set']) self.assertIn('a', filtered['o']['$set']) self.assertEqual(filtered['o']['$unset'], update_op()['o']['$unset']) self.assertEqual({'_id': 1, 'a': 1, 'c': 1}, self.opman._projection) # Case 5: update op, fields provided, empty $set self.opman.fields = ['c'] filtered = self.opman.filter_oplog_entry(update_op()) self.assertNotIn('$set', filtered['o']) self.assertEqual(filtered['o']['$unset'], update_op()['o']['$unset']) self.assertEqual({'_id': 1, 'c': 1}, self.opman._projection) # Case 6: update op, fields provided, empty $unset self.opman.fields = ['a', 'b'] filtered = self.opman.filter_oplog_entry(update_op()) self.assertNotIn('$unset', filtered['o']) self.assertEqual(filtered['o']['$set'], update_op()['o']['$set']) self.assertEqual({'_id': 1, 'a': 1, 'b': 1}, self.opman._projection) # Case 7: update op, fields provided, entry is nullified self.opman.fields = ['d', 'e', 'f'] filtered = self.opman.filter_oplog_entry(update_op()) self.assertEqual(filtered, None) self.assertEqual({'_id': 1, 'd': 1, 'e': 1, 'f': 1}, self.opman._projection) # Case 8: update op, fields provided, replacement self.opman.fields = ['a', 'b', 'c'] filtered = self.opman.filter_oplog_entry({ 'op': 'u', 'o': {'a': 1, 'b': 2, 'c': 3, 'd': 4} }) self.assertEqual( filtered, {'op': 'u', 'o': {'a': 1, 'b': 2, 'c': 3}}) self.assertEqual({'_id': 1, 'a': 1, 'b': 1, 'c': 1}, self.opman._projection) def test_exclude_fields_constructor(self): # Test with the "_id" field in exclude_fields exclude_fields = ["_id", "title", "content", "author"] opman = OplogThread( primary_client=self.primary_conn, doc_managers=(DocManager(),), oplog_progress_dict=LockingDict(), dest_mapping_stru=self.dest_mapping_stru, exclude_fields=exclude_fields ) exclude_fields.remove('_id') self._check_fields(opman, [], exclude_fields, dict((f, 0) for f in exclude_fields)) extra_fields = exclude_fields + ['extra1', 'extra2'] filtered = opman.filter_oplog_entry( {'op': 'i', 'o': dict((f, 1) for f in extra_fields)})['o'] self.assertEqual(dict((f, 1) for f in ['extra1', 'extra2']), filtered) # Test without "_id" field included in exclude_fields exclude_fields = ["title", "content", "author"] opman = OplogThread( primary_client=self.primary_conn, doc_managers=(DocManager(),), oplog_progress_dict=LockingDict(), dest_mapping_stru=self.dest_mapping_stru, exclude_fields=exclude_fields ) self._check_fields(opman, [], exclude_fields, dict((f, 0) for f in exclude_fields)) extra_fields = extra_fields + ['extra1', 'extra2'] filtered = opman.filter_oplog_entry( {'op': 'i', 'o': dict((f, 1) for f in extra_fields)})['o'] self.assertEqual({'extra1': 1, 'extra2': 1}, filtered) # Test with only "_id" field in exclude_fields exclude_fields = ["_id"] opman = OplogThread( primary_client=self.primary_conn, doc_managers=(DocManager(),), oplog_progress_dict=LockingDict(), dest_mapping_stru=self.dest_mapping_stru, exclude_fields=exclude_fields ) self._check_fields(opman, [], [], None) extra_fields = exclude_fields + ['extra1', 'extra2'] filtered = opman.filter_oplog_entry( {'op': 'i', 'o': dict((f, 1) for f in extra_fields)})['o'] self.assertEqual(dict((f, 1) for f in extra_fields), filtered) # Test with nothing set for exclude_fields opman = OplogThread( primary_client=self.primary_conn, doc_managers=(DocManager(),), oplog_progress_dict=LockingDict(), dest_mapping_stru=self.dest_mapping_stru, exclude_fields=None ) self._check_fields(opman, [], [], None) extra_fields = ['_id', 'extra1', 'extra2'] filtered = opman.filter_oplog_entry( {'op': 'i', 'o': dict((f, 1) for f in extra_fields)})['o'] self.assertEqual(dict((f, 1) for f in extra_fields), filtered) def test_fields_constructor(self): # Test with "_id" field in constructor fields = ["_id", "title", "content", "author"] opman = OplogThread( primary_client=self.primary_conn, doc_managers=(DocManager(),), oplog_progress_dict=LockingDict(), dest_mapping_stru=self.dest_mapping_stru, fields=fields ) self._check_fields(opman, fields, [], dict((f, 1) for f in fields)) extra_fields = fields + ['extra1', 'extra2'] filtered = opman.filter_oplog_entry( {'op': 'i', 'o': dict((f, 1) for f in extra_fields)})['o'] self.assertEqual(dict((f, 1) for f in fields), filtered) # Test without "_id" field in constructor fields = ["title", "content", "author"] opman = OplogThread( primary_client=self.primary_conn, doc_managers=(DocManager(),), oplog_progress_dict=LockingDict(), dest_mapping_stru=self.dest_mapping_stru, fields=fields ) fields.append('_id') self._check_fields(opman, fields, [], dict((f, 1) for f in fields)) extra_fields = fields + ['extra1', 'extra2'] filtered = opman.filter_oplog_entry( {'op': 'i', 'o': dict((f, 1) for f in extra_fields)})['o'] self.assertEqual(dict((f, 1) for f in fields), filtered) # Test with only "_id" field fields = ["_id"] opman = OplogThread( primary_client=self.primary_conn, doc_managers=(DocManager(),), oplog_progress_dict=LockingDict(), dest_mapping_stru=self.dest_mapping_stru, fields=fields ) self._check_fields(opman, fields, [], dict((f, 1) for f in fields)) extra_fields = fields + ['extra1', 'extra2'] filtered = opman.filter_oplog_entry( {'op': 'i', 'o': dict((f, 1) for f in extra_fields)})['o'] self.assertEqual({'_id': 1}, filtered) # Test with no fields set opman = OplogThread( primary_client=self.primary_conn, doc_managers=(DocManager(),), oplog_progress_dict=LockingDict(), dest_mapping_stru=self.dest_mapping_stru ) self._check_fields(opman, [], [], None) extra_fields = ['_id', 'extra1', 'extra2'] filtered = opman.filter_oplog_entry( {'op': 'i', 'o': dict((f, 1) for f in extra_fields)})['o'] self.assertEqual(dict((f, 1) for f in extra_fields), filtered) def test_exclude_fields_attr(self): # Test with the "_id" field in exclude_fields. exclude_fields = ["_id", "title", "content", "author"] exclude_fields.remove('_id') self.opman.exclude_fields = exclude_fields self._check_fields(self.opman, [], exclude_fields, dict((f, 0) for f in exclude_fields)) extra_fields = exclude_fields + ['extra1', 'extra2'] filtered = self.opman.filter_oplog_entry( {'op': 'i', 'o': dict((f, 1) for f in extra_fields)})['o'] self.assertEqual(dict((f, 1) for f in ['extra1', 'extra2']), filtered) # Test without "_id" field included in exclude_fields exclude_fields = ["title", "content", "author"] self.opman.exclude_fields = exclude_fields self._check_fields(self.opman, [], exclude_fields, dict((f, 0) for f in exclude_fields)) extra_fields = extra_fields + ['extra1', 'extra2'] filtered = self.opman.filter_oplog_entry( {'op': 'i', 'o': dict((f, 1) for f in extra_fields)})['o'] self.assertEqual({'extra1': 1, 'extra2': 1}, filtered) # Test with only "_id" field in exclude_fields exclude_fields = ["_id"] self.opman.exclude_fields = exclude_fields self._check_fields(self.opman, [], [], None) extra_fields = exclude_fields + ['extra1', 'extra2'] filtered = self.opman.filter_oplog_entry( {'op': 'i', 'o': dict((f, 1) for f in extra_fields)})['o'] self.assertEqual(dict((f, 1) for f in extra_fields), filtered) # Test with nothing set for exclude_fields self.opman.exclude_fields = None self._check_fields(self.opman, [], [], None) extra_fields = ['_id', 'extra1', 'extra2'] filtered = self.opman.filter_oplog_entry( {'op': 'i', 'o': dict((f, 1) for f in extra_fields)})['o'] self.assertEqual(dict((f, 1) for f in extra_fields), filtered) def test_fields_attr(self): # Test with "_id" field included in fields fields = ["_id", "title", "content", "author"] self.opman.fields = fields self._check_fields(self.opman, fields, [], dict((f, 1) for f in fields)) extra_fields = fields + ['extra1', 'extra2'] filtered = self.opman.filter_oplog_entry( {'op': 'i', 'o': dict((f, 1) for f in extra_fields)})['o'] self.assertEqual(dict((f, 1) for f in fields), filtered) # Test without "_id" field included in fields fields = ["title", "content", "author"] self.opman.fields = fields fields.append('_id') self._check_fields(self.opman, fields, [], dict((f, 1) for f in fields)) extra_fields = fields + ['extra1', 'extra2'] filtered = self.opman.filter_oplog_entry( {'op': 'i', 'o': dict((f, 1) for f in extra_fields)})['o'] self.assertEqual(dict((f, 1) for f in fields), filtered) # Test with only "_id" field fields = ["_id"] self.opman.fields = fields self._check_fields(self.opman, fields, [], dict((f, 1) for f in fields)) extra_fields = fields + ['extra1', 'extra2'] filtered = self.opman.filter_oplog_entry( {'op': 'i', 'o': dict((f, 1) for f in extra_fields)})['o'] self.assertEqual({'_id': 1}, filtered) # Test with no fields set self.opman.fields = None self._check_fields(self.opman, [], [], None) extra_fields = ['_id', 'extra1', 'extra2'] filtered = self.opman.filter_oplog_entry( {'op': 'i', 'o': dict((f, 1) for f in extra_fields)})['o'] self.assertEqual(dict((f, 1) for f in extra_fields), filtered) def test_nested_fields(self): def check_nested(document, fields, filtered_document, op='i'): self.opman.fields = fields fields.append('_id') self.assertEqual(set(fields), self.opman._fields) self.assertEqual(sorted(fields), sorted(self.opman.fields)) filtered_result = self.opman.filter_oplog_entry( {'op': op, 'o': document}) if filtered_result is not None: filtered_result = filtered_result['o'] self.assertEqual(filtered_result, filtered_document) document = {'name': 'Han Solo', 'a': {'b': {}}} fields = ['name', 'a.b.c'] filtered_document = {'name': 'Han Solo'} check_nested(document, fields, filtered_document) document = {'a': {'b': {'c': 2, 'e': 3}, 'e': 5}, 'b': 2, 'c': {'g': 1}} fields = ['a.b.c', 'a.e'] filtered_document = {'a': {'b': {'c': 2}, 'e': 5}} check_nested(document, fields, filtered_document) document = {'a': {'b': {'c': 2, 'e': 3}, 'e': 5}, 'b': 2, 'c': {'g': 1}, '_id': 1} fields = ['a.b.c', 'a.e'] filtered_document = {'a': {'b': {'c': 2}, 'e': 5}, '_id': 1} check_nested(document, fields, filtered_document) document = {'a': {'b': {'c': {'d': 1}}}, '-a': {'-b': {'-c': 2}}} fields = ['a.b', '-a'] filtered_document = document.copy() check_nested(document, fields, filtered_document) document = {'a': {'b': {'c': {'d': 1}}}, '-a': {'-b': {'-c': 2}}} fields = ['a', '-a.-b'] filtered_document = document.copy() check_nested(document, fields, filtered_document) document = {'a': {'b': {'c': {'d': 1}}}, '-a': {'-b': {'-c': 2}}, '_id': 1} fields = ['a.b', '-a'] filtered_document = document.copy() check_nested(document, fields, filtered_document) fields = ['a', '-a.-b'] check_nested(document, fields, filtered_document) document = {'test': 1} fields = ['doesnt_exist'] filtered_document = {} check_nested(document, fields, filtered_document) document = {'a': {'b': 1}, 'b': {'a': 1}} fields = ['a.b', 'b.a'] filtered_document = document.copy() check_nested(document, fields, filtered_document) document = {'a': {'b': {'a': {'b': 1}}}, 'c': {'a': {'b': 1}}} fields = ['a.b'] filtered_document = {'a': {'b': {'a': {'b': 1}}}} check_nested(document, fields, filtered_document) document = {'name': 'anna', 'name_of_cat': 'pushkin'} fields = ['name'] filtered_document = {'name': 'anna'} check_nested(document, fields, filtered_document) update = {'$set': {'a.b': 1, 'a.c': 3, 'b': 2, 'c': {'b': 3}}} fields = ['a', 'c'] filtered_update = {'$set': {'a.b': 1, 'a.c': 3, 'c': {'b': 3}}} check_nested(update, fields, filtered_update, op='u') update = {'$set': {'a.b': {'c': 3, 'd': 1}, 'a.e': 1, 'a.f': 2}} fields = ['a.b.c', 'a.e'] filtered_update = {'$set': {'a.b': {'c': 3}, 'a.e': 1}} check_nested(update, fields, filtered_update, op='u') update = {'$set': {'a.b.1': 1, 'a.b.2': 2, 'b': 3}} fields = ['a.b'] filtered_update = {'$set': {'a.b.1': 1, 'a.b.2': 2}} check_nested(update, fields, filtered_update, op='u') update = {'$set': {'a.b': {'c': 3, 'd': 1}, 'a.e': 1}} fields = ['a.b.e'] filtered_update = None check_nested(update, fields, filtered_update, op='u') def test_nested_exclude_fields(self): def check_nested(document, exclude_fields, filtered_document, op='i'): self.opman.exclude_fields = exclude_fields if '_id' in exclude_fields: exclude_fields.remove('_id') self.assertEqual(set(exclude_fields), self.opman._exclude_fields) self.assertEqual(sorted(exclude_fields), sorted(self.opman.exclude_fields)) filtered_result = self.opman.filter_oplog_entry( {'op': op, 'o': document}) if filtered_result is not None: filtered_result = filtered_result['o'] self.assertEqual(filtered_result, filtered_document) document = {'a': {'b': {'c': {'d': 0, 'e': 1}}}} exclude_fields = ['a.b.c.d'] filtered_document = {'a': {'b': {'c': {'e': 1}}}} check_nested(document, exclude_fields, filtered_document) document = {'a': {'b': {'c': {'-a': 0, 'd': {'e': {'f': 1}}}}}} exclude_fields = ['a.b.c.d.e.f'] filtered_document = {'a': {'b': {'c': {'-a': 0, 'd': {'e': {}}}}}} check_nested(document, exclude_fields, filtered_document) document = {'a': 1} exclude_fields = ['a'] filtered_document = {} check_nested(document, exclude_fields, filtered_document) document = {'a': {'b': {'c': 2, 'e': 3}, 'e': 5}, 'b': 2, 'c': {'g': 1}} exclude_fields = ['a.b.c', 'a.e'] filtered_document = {'a': {'b': {'e': 3}}, 'b': 2, 'c': {'g': 1}} check_nested(document, exclude_fields, filtered_document) document = {'a': {'b': {'c': 2, 'e': 3}, 'e': 5}, 'b': 2, 'c': {'g': 1}, '_id': 1} exclude_fields = ['a.b.c', 'a.e', '_id'] filtered_document = {'a': {'b': {'e': 3}}, 'b': 2, 'c': {'g': 1}, '_id': 1} check_nested(document, exclude_fields, filtered_document) document = {'a': {'b': {'c': {'d': 1}}}, '-a': {'-b': {'-c': 2}}} exclude_fields = ['a.b', '-a'] filtered_document = {'a': {}} check_nested(document, exclude_fields, filtered_document) document = {'a': {'b': {'c': {'d': 1}}}, '-a': {'-b': {'-c': 2}}} exclude_fields = ['a', '-a.-b'] filtered_document = {'-a': {}} check_nested(document, exclude_fields, filtered_document) document = {'a': {'b': {'c': {'d': 1}}}, '-a': {'-b': {'-c': 2}}, '_id': 1} exclude_fields = ['a.b', '-a'] filtered_document = {'_id': 1, 'a': {}} check_nested(document, exclude_fields, filtered_document) document = {'test': 1} exclude_fields = ['doesnt_exist'] filtered_document = document.copy() check_nested(document, exclude_fields, filtered_document) document = {'test': 1} exclude_fields = ['test.doesnt_exist'] filtered_document = document.copy() check_nested(document, exclude_fields, filtered_document) document = {'a': {'b': 1}, 'b': {'a': 1}} exclude_fields = ['a.b', 'b.a'] filtered_document = {'a': {}, 'b': {}} check_nested(document, exclude_fields, filtered_document) document = {'a': {'b': {'a': {'b': 1}}}, 'c': {'a': {'b': 1}}} exclude_fields = ['a.b'] filtered_document = {'a': {}, 'c': {'a': {'b': 1}}} check_nested(document, exclude_fields, filtered_document) document = {'name': 'anna', 'name_of_cat': 'pushkin'} exclude_fields = ['name'] filtered_document = {'name_of_cat': 'pushkin'} check_nested(document, exclude_fields, filtered_document) update = {'$set': {'a.b': 1, 'a.c': 3, 'b': 2, 'c': {'b': 3}}} exclude_fields = ['a', 'c'] filtered_update = {'$set': {'b': 2}} check_nested(update, exclude_fields, filtered_update, op='u') update = {'$set': {'a.b': {'c': 3, 'd': 1}, 'a.e': 1, 'a.f': 2}} exclude_fields = ['a.b.c', 'a.e'] filtered_update = {'$set': {'a.b': {'d': 1}, 'a.f': 2}} check_nested(update, exclude_fields, filtered_update, op='u') update = {'$set': {'a.b': {'c': 3, 'd': 1}, 'a.e': 1}} exclude_fields = ['a.b.c', 'a.b.d', 'a.e'] filtered_update = {'$set': {'a.b': {}}} check_nested(update, exclude_fields, filtered_update, op='u') update = {'$set': {'a.b.1': 1, 'a.b.2': 2, 'b': 3}} exclude_fields = ['a.b'] filtered_update = {'$set': {'b': 3}} check_nested(update, exclude_fields, filtered_update, op='u') update = {'$set': {'a.b.c': 42, 'd.e.f': 123, 'g': 456}} exclude_fields = ['a.b', 'd'] filtered_update = {'$set': {'g': 456}} check_nested(update, exclude_fields, filtered_update, op='u') update = {'$set': {'a.b': {'c': 3, 'd': 1}, 'a.e': 1}} exclude_fields = ['a.b', 'a.e'] filtered_update = None check_nested(update, exclude_fields, filtered_update, op='u') def test_fields_and_exclude(self): fields = ['a', 'b', 'c', '_id'] exclude_fields = ['x', 'y', 'z'] # Test setting both to None in constructor opman = OplogThread( primary_client=self.primary_conn, doc_managers=(DocManager(),), oplog_progress_dict=LockingDict(), dest_mapping_stru=self.dest_mapping_stru, fields=None, exclude_fields=None ) self._check_fields(opman, [], [], None) opman = OplogThread( primary_client=self.primary_conn, doc_managers=(DocManager(),), oplog_progress_dict=LockingDict(), dest_mapping_stru=self.dest_mapping_stru, fields=None, exclude_fields=exclude_fields ) self._check_fields(opman, [], exclude_fields, dict((f, 0) for f in exclude_fields)) # Test setting fields when exclude_fields is set self.assertRaises( errors.InvalidConfiguration, setattr, opman, "fields", fields) self.assertRaises( errors.InvalidConfiguration, setattr, opman, "fields", None) opman = OplogThread( primary_client=self.primary_conn, doc_managers=(DocManager(),), oplog_progress_dict=LockingDict(), dest_mapping_stru=self.dest_mapping_stru, exclude_fields=None, fields=fields ) self._check_fields(opman, fields, [], dict((f, 1) for f in fields)) self.assertRaises(errors.InvalidConfiguration, setattr, opman, "exclude_fields", exclude_fields) self.assertRaises(errors.InvalidConfiguration, setattr, opman, "exclude_fields", None) self.assertRaises( errors.InvalidConfiguration, OplogThread, self.primary_conn, (DocManager(),), LockingDict(), self.dest_mapping_stru, fields=fields, exclude_fields=exclude_fields)
class ShardedClusterTestCase(unittest.TestCase): def set_up_sharded_cluster(self, sharded_cluster_type): """ Initialize the cluster: Clean out the databases used by the tests Make connections to mongos, mongods Create and shard test collections Create OplogThreads """ self.cluster = sharded_cluster_type().start() # Connection to mongos self.mongos_conn = self.cluster.client() # Connections to the shards self.shard1_conn = self.cluster.shards[0].client() self.shard2_conn = self.cluster.shards[1].client() # Wipe any test data self.mongos_conn["test"]["mcsharded"].drop() # Disable the balancer before creating the collection self.mongos_conn.config.settings.update_one( {"_id": "balancer"}, {"$set": {"stopped": True}}, upsert=True ) # Create and shard the collection test.mcsharded on the "i" field self.mongos_conn["test"]["mcsharded"].create_index("i") self.mongos_conn.admin.command("enableSharding", "test") self.mongos_conn.admin.command("shardCollection", "test.mcsharded", key={"i": 1}) # Pre-split the collection so that: # i < 1000 lives on shard1 # i >= 1000 lives on shard2 self.mongos_conn.admin.command(bson.SON([ ("split", "test.mcsharded"), ("middle", {"i": 1000}) ])) # Move chunks to their proper places try: self.mongos_conn["admin"].command( "moveChunk", "test.mcsharded", find={"i": 1}, to='demo-set-0' ) except pymongo.errors.OperationFailure: pass try: self.mongos_conn["admin"].command( "moveChunk", "test.mcsharded", find={"i": 1000}, to='demo-set-1' ) except pymongo.errors.OperationFailure: pass # Make sure chunks are distributed correctly self.mongos_conn["test"]["mcsharded"].insert_one({"i": 1}) self.mongos_conn["test"]["mcsharded"].insert_one({"i": 1000}) def chunks_moved(): doc1 = self.shard1_conn.test.mcsharded.find_one() doc2 = self.shard2_conn.test.mcsharded.find_one() if None in (doc1, doc2): return False return doc1['i'] == 1 and doc2['i'] == 1000 assert_soon(chunks_moved, max_tries=120, message='chunks not moved? doc1=%r, doc2=%r' % ( self.shard1_conn.test.mcsharded.find_one(), self.shard2_conn.test.mcsharded.find_one())) self.mongos_conn.test.mcsharded.delete_many({}) # create a new oplog progress file try: os.unlink("oplog.timestamp") except OSError: pass open("oplog.timestamp", "w").close() # Oplog threads (oplog manager) for each shard doc_manager = DocManager() oplog_progress = LockingDict() namespace_config = NamespaceConfig( namespace_set=["test.mcsharded", "test.mcunsharded"]) self.opman1 = OplogThread( primary_client=self.shard1_conn, doc_managers=(doc_manager,), oplog_progress_dict=oplog_progress, namespace_config=namespace_config, mongos_client=self.mongos_conn ) self.opman2 = OplogThread( primary_client=self.shard2_conn, doc_managers=(doc_manager,), oplog_progress_dict=oplog_progress, namespace_config=namespace_config, mongos_client=self.mongos_conn ) def tearDown(self): try: self.opman1.join() except RuntimeError: pass # thread may not have been started try: self.opman2.join() except RuntimeError: pass # thread may not have been started close_client(self.mongos_conn) close_client(self.shard1_conn) close_client(self.shard2_conn) self.cluster.stop()
class ShardedClusterTestCase(unittest.TestCase): def set_up_sharded_cluster(self, sharded_cluster_type): """ Initialize the cluster: Clean out the databases used by the tests Make connections to mongos, mongods Create and shard test collections Create OplogThreads """ self.cluster = sharded_cluster_type().start() # Connection to mongos self.mongos_conn = self.cluster.client() # Connections to the shards self.shard1_conn = self.cluster.shards[0].client() self.shard2_conn = self.cluster.shards[1].client() # Wipe any test data self.mongos_conn["test"]["mcsharded"].drop() # Disable the balancer before creating the collection self.mongos_conn.config.settings.update_one( {"_id": "balancer"}, {"$set": { "stopped": True }}, upsert=True) # Create and shard the collection test.mcsharded on the "i" field self.mongos_conn["test"]["mcsharded"].create_index("i") self.mongos_conn.admin.command("enableSharding", "test") self.mongos_conn.admin.command("shardCollection", "test.mcsharded", key={"i": 1}) # Pre-split the collection so that: # i < 1000 lives on shard1 # i >= 1000 lives on shard2 self.mongos_conn.admin.command( bson.SON([("split", "test.mcsharded"), ("middle", { "i": 1000 })])) # Move chunks to their proper places try: self.mongos_conn["admin"].command("moveChunk", "test.mcsharded", find={"i": 1}, to='demo-set-0') except pymongo.errors.OperationFailure: pass try: self.mongos_conn["admin"].command("moveChunk", "test.mcsharded", find={"i": 1000}, to='demo-set-1') except pymongo.errors.OperationFailure: pass # Make sure chunks are distributed correctly self.mongos_conn["test"]["mcsharded"].insert_one({"i": 1}) self.mongos_conn["test"]["mcsharded"].insert_one({"i": 1000}) def chunks_moved(): doc1 = self.shard1_conn.test.mcsharded.find_one() doc2 = self.shard2_conn.test.mcsharded.find_one() if None in (doc1, doc2): return False return doc1['i'] == 1 and doc2['i'] == 1000 assert_soon(chunks_moved, max_tries=120, message='chunks not moved? doc1=%r, doc2=%r' % (self.shard1_conn.test.mcsharded.find_one(), self.shard2_conn.test.mcsharded.find_one())) self.mongos_conn.test.mcsharded.delete_many({}) # create a new oplog progress file try: os.unlink("oplog.timestamp") except OSError: pass open("oplog.timestamp", "w").close() # Oplog threads (oplog manager) for each shard doc_manager = DocManager() oplog_progress = LockingDict() namespace_config = NamespaceConfig( namespace_set=["test.mcsharded", "test.mcunsharded"]) self.opman1 = OplogThread(primary_client=self.shard1_conn, doc_managers=(doc_manager, ), oplog_progress_dict=oplog_progress, namespace_config=namespace_config, mongos_client=self.mongos_conn) self.opman2 = OplogThread(primary_client=self.shard2_conn, doc_managers=(doc_manager, ), oplog_progress_dict=oplog_progress, namespace_config=namespace_config, mongos_client=self.mongos_conn) def tearDown(self): try: self.opman1.join() except RuntimeError: pass # thread may not have been started try: self.opman2.join() except RuntimeError: pass # thread may not have been started close_client(self.mongos_conn) close_client(self.shard1_conn) close_client(self.shard2_conn) self.cluster.stop()
class TestOplogManagerSharded(unittest.TestCase): """Defines all test cases for OplogThreads running on a sharded cluster """ def setUp(self): """ Initialize the cluster: Clean out the databases used by the tests Make connections to mongos, mongods Create and shard test collections Create OplogThreads """ # Start the cluster with a mongos on port 27217 self.mongos_p = start_cluster() # Connection to mongos mongos_address = '%s:%d' % (mongo_host, self.mongos_p) self.mongos_conn = MongoClient(mongos_address) # Connections to the shards shard1_ports = get_shard(self.mongos_p, 0) shard2_ports = get_shard(self.mongos_p, 1) self.shard1_prim_p = shard1_ports['primary'] self.shard1_scnd_p = shard1_ports['secondaries'][0] self.shard2_prim_p = shard2_ports['primary'] self.shard2_scnd_p = shard2_ports['secondaries'][0] self.shard1_conn = MongoClient('%s:%d' % (mongo_host, self.shard1_prim_p), replicaSet="demo-set-0") self.shard2_conn = MongoClient('%s:%d' % (mongo_host, self.shard2_prim_p), replicaSet="demo-set-1") self.shard1_secondary_conn = MongoClient( '%s:%d' % (mongo_host, self.shard1_scnd_p), read_preference=ReadPreference.SECONDARY_PREFERRED ) self.shard2_secondary_conn = MongoClient( '%s:%d' % (mongo_host, self.shard2_scnd_p), read_preference=ReadPreference.SECONDARY_PREFERRED ) # Wipe any test data self.mongos_conn["test"]["mcsharded"].drop() # Create and shard the collection test.mcsharded on the "i" field self.mongos_conn["test"]["mcsharded"].ensure_index("i") self.mongos_conn.admin.command("enableSharding", "test") self.mongos_conn.admin.command("shardCollection", "test.mcsharded", key={"i": 1}) # Pre-split the collection so that: # i < 1000 lives on shard1 # i >= 1000 lives on shard2 self.mongos_conn.admin.command(bson.SON([ ("split", "test.mcsharded"), ("middle", {"i": 1000}) ])) # disable the balancer self.mongos_conn.config.settings.update( {"_id": "balancer"}, {"$set": {"stopped": True}}, upsert=True ) # Move chunks to their proper places try: self.mongos_conn["admin"].command( "moveChunk", "test.mcsharded", find={"i": 1}, to="demo-set-0" ) except pymongo.errors.OperationFailure: pass # chunk may already be on the correct shard try: self.mongos_conn["admin"].command( "moveChunk", "test.mcsharded", find={"i": 1000}, to="demo-set-1" ) except pymongo.errors.OperationFailure: pass # chunk may already be on the correct shard # Make sure chunks are distributed correctly self.mongos_conn["test"]["mcsharded"].insert({"i": 1}) self.mongos_conn["test"]["mcsharded"].insert({"i": 1000}) def chunks_moved(): doc1 = self.shard1_conn.test.mcsharded.find_one() doc2 = self.shard2_conn.test.mcsharded.find_one() if None in (doc1, doc2): return False return doc1['i'] == 1 and doc2['i'] == 1000 assert_soon(chunks_moved) self.mongos_conn.test.mcsharded.remove() # create a new oplog progress file try: os.unlink("config.txt") except OSError: pass open("config.txt", "w").close() # Oplog threads (oplog manager) for each shard doc_manager = DocManager() oplog_progress = LockingDict() self.opman1 = OplogThread( primary_conn=self.shard1_conn, main_address='%s:%d' % (mongo_host, self.mongos_p), oplog_coll=self.shard1_conn["local"]["oplog.rs"], is_sharded=True, doc_manager=doc_manager, oplog_progress_dict=oplog_progress, namespace_set=["test.mcsharded", "test.mcunsharded"], auth_key=None, auth_username=None ) self.opman2 = OplogThread( primary_conn=self.shard2_conn, main_address='%s:%d' % (mongo_host, self.mongos_p), oplog_coll=self.shard2_conn["local"]["oplog.rs"], is_sharded=True, doc_manager=doc_manager, oplog_progress_dict=oplog_progress, namespace_set=["test.mcsharded", "test.mcunsharded"], auth_key=None, auth_username=None ) def tearDown(self): try: self.opman1.join() except RuntimeError: pass # thread may not have been started try: self.opman2.join() except RuntimeError: pass # thread may not have been started self.mongos_conn.close() self.shard1_conn.close() self.shard2_conn.close() self.shard1_secondary_conn.close() self.shard2_secondary_conn.close() kill_all() def test_retrieve_doc(self): """ Test the retrieve_doc method """ # Trivial case where the oplog entry is None self.assertEqual(self.opman1.retrieve_doc(None), None) # Retrieve a document from insert operation in oplog doc = {"name": "mango", "type": "fruit", "ns": "test.mcsharded", "weight": 3.24, "i": 1} self.mongos_conn["test"]["mcsharded"].insert(doc) oplog_entries = self.shard1_conn["local"]["oplog.rs"].find( sort=[("ts", pymongo.DESCENDING)], limit=1 ) oplog_entry = next(oplog_entries) self.assertEqual(self.opman1.retrieve_doc(oplog_entry), doc) # Retrieve a document from update operation in oplog self.mongos_conn["test"]["mcsharded"].update( {"i": 1}, {"$set": {"sounds-like": "mongo"}} ) oplog_entries = self.shard1_conn["local"]["oplog.rs"].find( sort=[("ts", pymongo.DESCENDING)], limit=1 ) doc["sounds-like"] = "mongo" self.assertEqual(self.opman1.retrieve_doc(next(oplog_entries)), doc) # Retrieve a document from remove operation in oplog # (expected: None) self.mongos_conn["test"]["mcsharded"].remove({ "i": 1 }) oplog_entries = self.shard1_conn["local"]["oplog.rs"].find( sort=[("ts", pymongo.DESCENDING)], limit=1 ) self.assertEqual(self.opman1.retrieve_doc(next(oplog_entries)), None) # Retrieve a document with bad _id # (expected: None) oplog_entry["o"]["_id"] = "ThisIsNotAnId123456789" self.assertEqual(self.opman1.retrieve_doc(oplog_entry), None) def test_get_oplog_cursor(self): """Test the get_oplog_cursor method""" # Trivial case: timestamp is None self.assertEqual(self.opman1.get_oplog_cursor(None), None) # earliest entry is after given timestamp doc = {"ts": bson.Timestamp(1000, 0), "i": 1} self.mongos_conn["test"]["mcsharded"].insert(doc) self.assertEqual(self.opman1.get_oplog_cursor( bson.Timestamp(1, 0)), None) # earliest entry is the only one at/after timestamp latest_timestamp = self.opman1.get_last_oplog_timestamp() cursor = self.opman1.get_oplog_cursor(latest_timestamp) self.assertNotEqual(cursor, None) self.assertEqual(cursor.count(), 1) self.assertEqual(self.opman1.retrieve_doc(cursor[0]), doc) # many entries before and after timestamp for i in range(2, 2002): self.mongos_conn["test"]["mcsharded"].insert({ "i": i }) oplog1 = self.shard1_conn["local"]["oplog.rs"].find( sort=[("ts", pymongo.ASCENDING)] ) oplog2 = self.shard2_conn["local"]["oplog.rs"].find( sort=[("ts", pymongo.ASCENDING)] ) # oplogs should have records for inserts performed, plus # various other messages oplog1_count = oplog1.count() oplog2_count = oplog2.count() self.assertGreaterEqual(oplog1_count, 998) self.assertGreaterEqual(oplog2_count, 1002) pivot1 = oplog1.skip(400).limit(1)[0] pivot2 = oplog2.skip(400).limit(1)[0] cursor1 = self.opman1.get_oplog_cursor(pivot1["ts"]) cursor2 = self.opman2.get_oplog_cursor(pivot2["ts"]) self.assertEqual(cursor1.count(), oplog1_count - 400) self.assertEqual(cursor2.count(), oplog2_count - 400) # get_oplog_cursor fast-forwards *one doc beyond* the given timestamp doc1 = self.mongos_conn["test"]["mcsharded"].find_one( {"_id": next(cursor1)["o"]["_id"]}) doc2 = self.mongos_conn["test"]["mcsharded"].find_one( {"_id": next(cursor2)["o"]["_id"]}) self.assertEqual(doc1["i"], self.opman1.retrieve_doc(pivot1)["i"] + 1) self.assertEqual(doc2["i"], self.opman2.retrieve_doc(pivot2)["i"] + 1) def test_get_last_oplog_timestamp(self): """Test the get_last_oplog_timestamp method""" # "empty" the oplog self.opman1.oplog = self.shard1_conn["test"]["emptycollection"] self.opman2.oplog = self.shard2_conn["test"]["emptycollection"] self.assertEqual(self.opman1.get_last_oplog_timestamp(), None) self.assertEqual(self.opman2.get_last_oplog_timestamp(), None) # Test non-empty oplog self.opman1.oplog = self.shard1_conn["local"]["oplog.rs"] self.opman2.oplog = self.shard2_conn["local"]["oplog.rs"] for i in range(1000): self.mongos_conn["test"]["mcsharded"].insert({ "i": i + 500 }) oplog1 = self.shard1_conn["local"]["oplog.rs"] oplog1 = oplog1.find().sort("$natural", pymongo.DESCENDING).limit(1)[0] oplog2 = self.shard2_conn["local"]["oplog.rs"] oplog2 = oplog2.find().sort("$natural", pymongo.DESCENDING).limit(1)[0] self.assertEqual(self.opman1.get_last_oplog_timestamp(), oplog1["ts"]) self.assertEqual(self.opman2.get_last_oplog_timestamp(), oplog2["ts"]) def test_dump_collection(self): """Test the dump_collection method Cases: 1. empty oplog 2. non-empty oplog """ # Test with empty oplog self.opman1.oplog = self.shard1_conn["test"]["emptycollection"] self.opman2.oplog = self.shard2_conn["test"]["emptycollection"] last_ts1 = self.opman1.dump_collection() last_ts2 = self.opman2.dump_collection() self.assertEqual(last_ts1, None) self.assertEqual(last_ts2, None) # Test with non-empty oplog self.opman1.oplog = self.shard1_conn["local"]["oplog.rs"] self.opman2.oplog = self.shard2_conn["local"]["oplog.rs"] for i in range(1000): self.mongos_conn["test"]["mcsharded"].insert({ "i": i + 500 }) last_ts1 = self.opman1.get_last_oplog_timestamp() last_ts2 = self.opman2.get_last_oplog_timestamp() self.assertEqual(last_ts1, self.opman1.dump_collection()) self.assertEqual(last_ts2, self.opman2.dump_collection()) self.assertEqual(len(self.opman1.doc_managers[0]._search()), 1000) def test_init_cursor(self): """Test the init_cursor method Cases: 1. no last checkpoint, no collection dump 2. no last checkpoint, collection dump ok and stuff to dump 3. no last checkpoint, nothing to dump, stuff in oplog 4. no last checkpoint, nothing to dump, nothing in oplog 5. last checkpoint exists """ # N.B. these sub-cases build off of each other and cannot be re-ordered # without side-effects # No last checkpoint, no collection dump, nothing in oplog # "change oplog collection" to put nothing in oplog self.opman1.oplog = self.shard1_conn["test"]["emptycollection"] self.opman2.oplog = self.shard2_conn["test"]["emptycollection"] self.opman1.collection_dump = False self.opman2.collection_dump = False self.assertEqual(self.opman1.init_cursor(), None) self.assertEqual(self.opman1.checkpoint, None) self.assertEqual(self.opman2.init_cursor(), None) self.assertEqual(self.opman2.checkpoint, None) # No last checkpoint, empty collections, nothing in oplog self.opman1.collection_dump = True self.opman2.collection_dump = True self.assertEqual(self.opman1.init_cursor(), None) self.assertEqual(self.opman1.checkpoint, None) self.assertEqual(self.opman2.init_cursor(), None) self.assertEqual(self.opman2.checkpoint, None) # No last checkpoint, empty collections, something in oplog self.opman1.oplog = self.shard1_conn["local"]["oplog.rs"] self.opman2.oplog = self.shard2_conn["local"]["oplog.rs"] oplog_startup_ts = self.opman2.get_last_oplog_timestamp() collection = self.mongos_conn["test"]["mcsharded"] collection.insert({"i": 1}) collection.remove({"i": 1}) time.sleep(3) last_ts1 = self.opman1.get_last_oplog_timestamp() self.assertEqual(next(self.opman1.init_cursor())["ts"], last_ts1) self.assertEqual(self.opman1.checkpoint, last_ts1) with self.opman1.oplog_progress as prog: self.assertEqual(prog.get_dict()[str(self.opman1.oplog)], last_ts1) # init_cursor should point to startup message in shard2 oplog cursor = self.opman2.init_cursor() self.assertEqual(next(cursor)["ts"], oplog_startup_ts) self.assertEqual(self.opman2.checkpoint, oplog_startup_ts) # No last checkpoint, non-empty collections, stuff in oplog progress = LockingDict() self.opman1.oplog_progress = self.opman2.oplog_progress = progress collection.insert({"i": 1200}) last_ts2 = self.opman2.get_last_oplog_timestamp() self.assertEqual(next(self.opman1.init_cursor())["ts"], last_ts1) self.assertEqual(self.opman1.checkpoint, last_ts1) with self.opman1.oplog_progress as prog: self.assertEqual(prog.get_dict()[str(self.opman1.oplog)], last_ts1) self.assertEqual(next(self.opman2.init_cursor())["ts"], last_ts2) self.assertEqual(self.opman2.checkpoint, last_ts2) with self.opman2.oplog_progress as prog: self.assertEqual(prog.get_dict()[str(self.opman2.oplog)], last_ts2) # Last checkpoint exists progress = LockingDict() self.opman1.oplog_progress = self.opman2.oplog_progress = progress for i in range(1000): collection.insert({"i": i + 500}) entry1 = list( self.shard1_conn["local"]["oplog.rs"].find(skip=200, limit=2)) entry2 = list( self.shard2_conn["local"]["oplog.rs"].find(skip=200, limit=2)) progress.get_dict()[str(self.opman1.oplog)] = entry1[0]["ts"] progress.get_dict()[str(self.opman2.oplog)] = entry2[0]["ts"] self.opman1.oplog_progress = self.opman2.oplog_progress = progress self.opman1.checkpoint = self.opman2.checkpoint = None cursor1 = self.opman1.init_cursor() cursor2 = self.opman2.init_cursor() self.assertEqual(entry1[1]["ts"], next(cursor1)["ts"]) self.assertEqual(entry2[1]["ts"], next(cursor2)["ts"]) self.assertEqual(self.opman1.checkpoint, entry1[0]["ts"]) self.assertEqual(self.opman2.checkpoint, entry2[0]["ts"]) with self.opman1.oplog_progress as prog: self.assertEqual(prog.get_dict()[str(self.opman1.oplog)], entry1[0]["ts"]) with self.opman2.oplog_progress as prog: self.assertEqual(prog.get_dict()[str(self.opman2.oplog)], entry2[0]["ts"]) def test_rollback(self): """Test the rollback method in a sharded environment Cases: 1. Documents on both shards, rollback on one shard 2. Documents on both shards, rollback on both shards """ self.opman1.start() self.opman2.start() # Insert first documents while primaries are up db_main = self.mongos_conn["test"]["mcsharded"] db_main.insert({"i": 0}, w=2) db_main.insert({"i": 1000}, w=2) self.assertEqual(self.shard1_conn["test"]["mcsharded"].count(), 1) self.assertEqual(self.shard2_conn["test"]["mcsharded"].count(), 1) # Case 1: only one primary goes down, shard1 in this case kill_mongo_proc(self.shard1_prim_p, destroy=False) # Wait for the secondary to be promoted shard1_secondary_admin = self.shard1_secondary_conn["admin"] assert_soon( lambda: shard1_secondary_admin.command("isMaster")["ismaster"]) # Insert another document. This will be rolled back later retry_until_ok(db_main.insert, {"i": 1}) db_secondary1 = self.shard1_secondary_conn["test"]["mcsharded"] db_secondary2 = self.shard2_secondary_conn["test"]["mcsharded"] self.assertEqual(db_secondary1.count(), 2) # Wait for replication on the doc manager # Note that both OplogThreads share the same doc manager c = lambda: len(self.opman1.doc_managers[0]._search()) == 3 assert_soon(c, "not all writes were replicated to doc manager", max_tries=120) # Kill the new primary kill_mongo_proc(self.shard1_scnd_p, destroy=False) # Start both servers back up restart_mongo_proc(self.shard1_prim_p) primary_admin = self.shard1_conn["admin"] c = lambda: primary_admin.command("isMaster")["ismaster"] assert_soon(lambda: retry_until_ok(c)) restart_mongo_proc(self.shard1_scnd_p) secondary_admin = self.shard1_secondary_conn["admin"] c = lambda: secondary_admin.command("replSetGetStatus")["myState"] == 2 assert_soon(c) query = {"i": {"$lt": 1000}} assert_soon(lambda: retry_until_ok(db_main.find(query).count) > 0) # Only first document should exist in MongoDB self.assertEqual(db_main.find(query).count(), 1) self.assertEqual(db_main.find_one(query)["i"], 0) # Same should hold for the doc manager docman_docs = [d for d in self.opman1.doc_managers[0]._search() if d["i"] < 1000] self.assertEqual(len(docman_docs), 1) self.assertEqual(docman_docs[0]["i"], 0) # Wait for previous rollback to complete def rollback_done(): secondary1_count = retry_until_ok(db_secondary1.count) secondary2_count = retry_until_ok(db_secondary2.count) return (1, 1) == (secondary1_count, secondary2_count) assert_soon(rollback_done, "rollback never replicated to one or more secondaries") ############################## # Case 2: Primaries on both shards go down kill_mongo_proc(self.shard1_prim_p, destroy=False) kill_mongo_proc(self.shard2_prim_p, destroy=False) # Wait for the secondaries to be promoted shard1_secondary_admin = self.shard1_secondary_conn["admin"] shard2_secondary_admin = self.shard2_secondary_conn["admin"] assert_soon( lambda: shard1_secondary_admin.command("isMaster")["ismaster"]) assert_soon( lambda: shard2_secondary_admin.command("isMaster")["ismaster"]) # Insert another document on each shard. These will be rolled back later retry_until_ok(db_main.insert, {"i": 1}) self.assertEqual(db_secondary1.count(), 2) retry_until_ok(db_main.insert, {"i": 1001}) self.assertEqual(db_secondary2.count(), 2) # Wait for replication on the doc manager c = lambda: len(self.opman1.doc_managers[0]._search()) == 4 assert_soon(c, "not all writes were replicated to doc manager") # Kill the new primaries kill_mongo_proc(self.shard1_scnd_p, destroy=False) kill_mongo_proc(self.shard2_scnd_p, destroy=False) # Start the servers back up... # Shard 1 restart_mongo_proc(self.shard1_prim_p) c = lambda: self.shard1_conn['admin'].command("isMaster")["ismaster"] assert_soon(lambda: retry_until_ok(c)) restart_mongo_proc(self.shard1_scnd_p) secondary_admin = self.shard1_secondary_conn["admin"] c = lambda: secondary_admin.command("replSetGetStatus")["myState"] == 2 assert_soon(c) # Shard 2 restart_mongo_proc(self.shard2_prim_p) c = lambda: self.shard2_conn['admin'].command("isMaster")["ismaster"] assert_soon(lambda: retry_until_ok(c)) restart_mongo_proc(self.shard2_scnd_p) secondary_admin = self.shard2_secondary_conn["admin"] c = lambda: secondary_admin.command("replSetGetStatus")["myState"] == 2 assert_soon(c) # Wait for the shards to come online assert_soon(lambda: retry_until_ok(db_main.find(query).count) > 0) query2 = {"i": {"$gte": 1000}} assert_soon(lambda: retry_until_ok(db_main.find(query2).count) > 0) # Only first documents should exist in MongoDB self.assertEqual(db_main.find(query).count(), 1) self.assertEqual(db_main.find_one(query)["i"], 0) self.assertEqual(db_main.find(query2).count(), 1) self.assertEqual(db_main.find_one(query2)["i"], 1000) # Same should hold for the doc manager i_values = [d["i"] for d in self.opman1.doc_managers[0]._search()] self.assertEqual(len(i_values), 2) self.assertIn(0, i_values) self.assertIn(1000, i_values) def test_with_chunk_migration(self): """Test that DocManagers have proper state after both a successful and an unsuccessful chunk migration """ # Start replicating to dummy doc managers self.opman1.start() self.opman2.start() collection = self.mongos_conn["test"]["mcsharded"] for i in range(1000): collection.insert({"i": i + 500}) # Assert current state of the mongoverse self.assertEqual(self.shard1_conn["test"]["mcsharded"].find().count(), 500) self.assertEqual(self.shard2_conn["test"]["mcsharded"].find().count(), 500) assert_soon(lambda: len(self.opman1.doc_managers[0]._search()) == 1000) # Test successful chunk move from shard 1 to shard 2 self.mongos_conn["admin"].command( "moveChunk", "test.mcsharded", find={"i": 1}, to="demo-set-1" ) # doc manager should still have all docs all_docs = self.opman1.doc_managers[0]._search() self.assertEqual(len(all_docs), 1000) for i, doc in enumerate(sorted(all_docs, key=lambda x: x["i"])): self.assertEqual(doc["i"], i + 500) # Mark the collection as "dropped". This will cause migration to fail. self.mongos_conn["config"]["collections"].update( {"_id": "test.mcsharded"}, {"$set": {"dropped": True}} ) # Test unsuccessful chunk move from shard 2 to shard 1 def fail_to_move_chunk(): self.mongos_conn["admin"].command( "moveChunk", "test.mcsharded", find={"i": 1}, to="demo-set-0" ) self.assertRaises(pymongo.errors.OperationFailure, fail_to_move_chunk) # doc manager should still have all docs all_docs = self.opman1.doc_managers[0]._search() self.assertEqual(len(all_docs), 1000) for i, doc in enumerate(sorted(all_docs, key=lambda x: x["i"])): self.assertEqual(doc["i"], i + 500) def test_with_orphan_documents(self): """Test that DocManagers have proper state after a chunk migration that resuts in orphaned documents. """ # Start replicating to dummy doc managers self.opman1.start() self.opman2.start() collection = self.mongos_conn["test"]["mcsharded"] collection.insert({"i": i + 500} for i in range(1000)) # Assert current state of the mongoverse self.assertEqual(self.shard1_conn["test"]["mcsharded"].find().count(), 500) self.assertEqual(self.shard2_conn["test"]["mcsharded"].find().count(), 500) assert_soon(lambda: len(self.opman1.doc_managers[0]._search()) == 1000) # Stop replication using the 'rsSyncApplyStop' failpoint self.shard1_conn.admin.command( "configureFailPoint", "rsSyncApplyStop", mode="alwaysOn" ) # Move a chunk from shard2 to shard1 def move_chunk(): try: self.mongos_conn["admin"].command( "moveChunk", "test.mcsharded", find={"i": 1000}, to="demo-set-0" ) except pymongo.errors.OperationFailure: pass # moveChunk will never complete, so use another thread to continue mover = threading.Thread(target=move_chunk) mover.start() # wait for documents to start moving to shard 1 assert_soon(lambda: self.shard1_conn.test.mcsharded.count() > 500) # Get opid for moveChunk command operations = self.mongos_conn.test.current_op() opid = None for op in operations["inprog"]: if op.get("query", {}).get("moveChunk"): opid = op["opid"] self.assertNotEqual(opid, None, "could not find moveChunk operation") # Kill moveChunk with the opid self.mongos_conn["test"]["$cmd.sys.killop"].find_one({"op": opid}) # Mongo Connector should not become confused by unsuccessful chunk move docs = self.opman1.doc_managers[0]._search() self.assertEqual(len(docs), 1000) self.assertEqual(sorted(d["i"] for d in docs), list(range(500, 1500))) # cleanup mover.join()
class TestRollbacks(unittest.TestCase): def tearDown(self): kill_all() def setUp(self): # Create a new oplog progress file try: os.unlink("config.txt") except OSError: pass open("config.txt", "w").close() # Start a replica set _, self.secondary_p, self.primary_p = start_replica_set('rollbacks') # Connection to the replica set as a whole self.main_conn = MongoClient('%s:%d' % (mongo_host, self.primary_p), replicaSet='rollbacks') # Connection to the primary specifically self.primary_conn = MongoClient('%s:%d' % (mongo_host, self.primary_p)) # Connection to the secondary specifically self.secondary_conn = MongoClient( '%s:%d' % (mongo_host, self.secondary_p), read_preference=ReadPreference.SECONDARY_PREFERRED) # Wipe any test data self.main_conn["test"]["mc"].drop() # Oplog thread doc_manager = DocManager() oplog_progress = LockingDict() self.opman = OplogThread( primary_conn=self.main_conn, main_address='%s:%d' % (mongo_host, self.primary_p), oplog_coll=self.main_conn["local"]["oplog.rs"], is_sharded=False, doc_manager=doc_manager, oplog_progress_dict=oplog_progress, namespace_set=["test.mc"], auth_key=None, auth_username=None, repl_set="rollbacks") def test_single_target(self): """Test with a single replication target""" self.opman.start() # Insert first document with primary up self.main_conn["test"]["mc"].insert({"i": 0}) self.assertEqual(self.primary_conn["test"]["mc"].find().count(), 1) # Make sure the insert is replicated secondary = self.secondary_conn assert_soon(lambda: secondary["test"]["mc"].count() == 1, "first write didn't replicate to secondary") # Kill the primary kill_mongo_proc(self.primary_p, destroy=False) # Wait for the secondary to be promoted assert_soon(lambda: secondary["admin"].command("isMaster")["ismaster"]) # Insert another document. This will be rolled back later retry_until_ok(self.main_conn["test"]["mc"].insert, {"i": 1}) self.assertEqual(secondary["test"]["mc"].count(), 2) # Wait for replication to doc manager assert_soon(lambda: len(self.opman.doc_managers[0]._search()) == 2, "not all writes were replicated to doc manager") # Kill the new primary kill_mongo_proc(self.secondary_p, destroy=False) # Start both servers back up restart_mongo_proc(self.primary_p) primary_admin = self.primary_conn["admin"] assert_soon(lambda: primary_admin.command("isMaster")["ismaster"], "restarted primary never resumed primary status") restart_mongo_proc(self.secondary_p) assert_soon( lambda: retry_until_ok(secondary.admin.command, 'replSetGetStatus' )['myState'] == 2, "restarted secondary never resumed secondary status") assert_soon( lambda: retry_until_ok(self.main_conn.test.mc.find().count) > 0, "documents not found after primary/secondary restarted") # Only first document should exist in MongoDB self.assertEqual(self.main_conn["test"]["mc"].count(), 1) self.assertEqual(self.main_conn["test"]["mc"].find_one()["i"], 0) # Same case should hold for the doc manager doc_manager = self.opman.doc_managers[0] self.assertEqual(len(doc_manager._search()), 1) self.assertEqual(doc_manager._search()[0]["i"], 0) # cleanup self.opman.join() def test_many_targets(self): """Test with several replication targets""" # OplogThread has multiple doc managers doc_managers = [DocManager(), DocManager(), DocManager()] self.opman.doc_managers = doc_managers self.opman.start() # Insert a document into each namespace self.main_conn["test"]["mc"].insert({"i": 0}) self.assertEqual(self.primary_conn["test"]["mc"].count(), 1) # Make sure the insert is replicated secondary = self.secondary_conn assert_soon(lambda: secondary["test"]["mc"].count() == 1, "first write didn't replicate to secondary") # Kill the primary kill_mongo_proc(self.primary_p, destroy=False) # Wait for the secondary to be promoted assert_soon(lambda: secondary.admin.command("isMaster")['ismaster'], 'secondary was never promoted') # Insert more documents. This will be rolled back later # Some of these documents will be manually removed from # certain doc managers, to emulate the effect of certain # target systems being ahead/behind others secondary_ids = [] for i in range(1, 10): secondary_ids.append( retry_until_ok(self.main_conn["test"]["mc"].insert, {"i": i})) self.assertEqual(self.secondary_conn["test"]["mc"].count(), 10) # Wait for replication to the doc managers def docmans_done(): for dm in self.opman.doc_managers: if len(dm._search()) != 10: return False return True assert_soon(docmans_done, "not all writes were replicated to doc managers") # Remove some documents from the doc managers to simulate # uneven replication ts = self.opman.doc_managers[0].get_last_doc()['_ts'] for id in secondary_ids[8:]: self.opman.doc_managers[1].remove({ "_id": id, "ns": "test.mc", "_ts": ts }) for id in secondary_ids[2:]: self.opman.doc_managers[2].remove({ "_id": id, "ns": "test.mc", "_ts": ts }) # Kill the new primary kill_mongo_proc(self.secondary_p, destroy=False) # Start both servers back up restart_mongo_proc(self.primary_p) primary_admin = self.primary_conn["admin"] assert_soon(lambda: primary_admin.command("isMaster")['ismaster'], 'restarted primary never resumed primary status') restart_mongo_proc(self.secondary_p) assert_soon( lambda: retry_until_ok(secondary.admin.command, 'replSetGetStatus' )['myState'] == 2, "restarted secondary never resumed secondary status") assert_soon( lambda: retry_until_ok(self.primary_conn.test.mc.find().count) > 0, "documents not found after primary/secondary restarted") # Only first document should exist in MongoDB self.assertEqual(self.primary_conn["test"]["mc"].count(), 1) self.assertEqual(self.primary_conn["test"]["mc"].find_one()["i"], 0) # Give OplogThread some time to catch up time.sleep(10) # Same case should hold for the doc managers for dm in self.opman.doc_managers: self.assertEqual(len(dm._search()), 1) self.assertEqual(dm._search()[0]["i"], 0) self.opman.join() def test_deletions(self): """Test rolling back 'd' operations""" self.opman.start() # Insert a document, wait till it replicates to secondary self.main_conn["test"]["mc"].insert({"i": 0}) self.main_conn["test"]["mc"].insert({"i": 1}) self.assertEqual(self.primary_conn["test"]["mc"].find().count(), 2) assert_soon(lambda: self.secondary_conn["test"]["mc"].count() == 2, "first write didn't replicate to secondary") # Kill the primary, wait for secondary to be promoted kill_mongo_proc(self.primary_p, destroy=False) assert_soon(lambda: self.secondary_conn["admin"].command("isMaster")[ "ismaster"]) # Delete first document retry_until_ok(self.main_conn["test"]["mc"].remove, {"i": 0}) self.assertEqual(self.secondary_conn["test"]["mc"].count(), 1) # Wait for replication to doc manager assert_soon(lambda: len(self.opman.doc_managers[0]._search()) == 1, "delete was not replicated to doc manager") # Kill the new primary kill_mongo_proc(self.secondary_p, destroy=False) # Start both servers back up restart_mongo_proc(self.primary_p) primary_admin = self.primary_conn["admin"] assert_soon(lambda: primary_admin.command("isMaster")["ismaster"], "restarted primary never resumed primary status") restart_mongo_proc(self.secondary_p) assert_soon( lambda: retry_until_ok(self.secondary_conn.admin.command, 'replSetGetStatus')['myState'] == 2, "restarted secondary never resumed secondary status") # Both documents should exist in mongo assert_soon( lambda: retry_until_ok(self.main_conn["test"]["mc"].count) == 2) # Both document should exist in doc manager doc_manager = self.opman.doc_managers[0] docs = list(doc_manager._search()) self.assertEqual(len(docs), 2, "Expected two documents, but got %r" % docs) self.opman.join()
class TestOplogManagerSharded(unittest.TestCase): """Defines all test cases for OplogThreads running on a sharded cluster """ def setUp(self): """ Initialize the cluster: Clean out the databases used by the tests Make connections to mongos, mongods Create and shard test collections Create OplogThreads """ self.cluster = ShardedCluster().start() # Connection to mongos self.mongos_conn = self.cluster.client() # Connections to the shards self.shard1_conn = self.cluster.shards[0].client() self.shard2_conn = self.cluster.shards[1].client() self.shard1_secondary_conn = self.cluster.shards[0].secondary.client( readPreference=ReadPreference.SECONDARY_PREFERRED) self.shard2_secondary_conn = self.cluster.shards[1].secondary.client( readPreference=ReadPreference.SECONDARY_PREFERRED) # Wipe any test data self.mongos_conn["test"]["mcsharded"].drop() # Create and shard the collection test.mcsharded on the "i" field self.mongos_conn["test"]["mcsharded"].ensure_index("i") self.mongos_conn.admin.command("enableSharding", "test") self.mongos_conn.admin.command("shardCollection", "test.mcsharded", key={"i": 1}) # Pre-split the collection so that: # i < 1000 lives on shard1 # i >= 1000 lives on shard2 self.mongos_conn.admin.command( bson.SON([("split", "test.mcsharded"), ("middle", { "i": 1000 })])) # disable the balancer self.mongos_conn.config.settings.update({"_id": "balancer"}, {"$set": { "stopped": True }}, upsert=True) # Move chunks to their proper places try: self.mongos_conn["admin"].command("moveChunk", "test.mcsharded", find={"i": 1}, to='demo-set-0') except pymongo.errors.OperationFailure: pass try: self.mongos_conn["admin"].command("moveChunk", "test.mcsharded", find={"i": 1000}, to='demo-set-1') except pymongo.errors.OperationFailure: pass # Make sure chunks are distributed correctly self.mongos_conn["test"]["mcsharded"].insert({"i": 1}) self.mongos_conn["test"]["mcsharded"].insert({"i": 1000}) def chunks_moved(): doc1 = self.shard1_conn.test.mcsharded.find_one() doc2 = self.shard2_conn.test.mcsharded.find_one() if None in (doc1, doc2): return False return doc1['i'] == 1 and doc2['i'] == 1000 assert_soon(chunks_moved, max_tries=120, message='chunks not moved? doc1=%r, doc2=%r' % (self.shard1_conn.test.mcsharded.find_one(), self.shard2_conn.test.mcsharded.find_one())) self.mongos_conn.test.mcsharded.remove() # create a new oplog progress file try: os.unlink("oplog.timestamp") except OSError: pass open("oplog.timestamp", "w").close() # Oplog threads (oplog manager) for each shard doc_manager = DocManager() oplog_progress = LockingDict() self.opman1 = OplogThread( primary_client=self.shard1_conn, doc_managers=(doc_manager, ), oplog_progress_dict=oplog_progress, namespace_set=["test.mcsharded", "test.mcunsharded"], mongos_client=self.mongos_conn) self.opman2 = OplogThread( primary_client=self.shard2_conn, doc_managers=(doc_manager, ), oplog_progress_dict=oplog_progress, namespace_set=["test.mcsharded", "test.mcunsharded"], mongos_client=self.mongos_conn) def tearDown(self): try: self.opman1.join() except RuntimeError: pass # thread may not have been started try: self.opman2.join() except RuntimeError: pass # thread may not have been started self.mongos_conn.close() self.shard1_conn.close() self.shard2_conn.close() self.shard1_secondary_conn.close() self.shard2_secondary_conn.close() self.cluster.stop() def test_get_oplog_cursor(self): """Test the get_oplog_cursor method""" # timestamp = None cursor1 = self.opman1.get_oplog_cursor(None) oplog1 = self.shard1_conn["local"]["oplog.rs"].find( {'ns': { '$in': self.opman1.namespace_set }}) self.assertEqual(list(cursor1), list(oplog1)) cursor2 = self.opman2.get_oplog_cursor(None) oplog2 = self.shard2_conn["local"]["oplog.rs"].find( {'ns': { '$in': self.opman2.namespace_set }}) self.assertEqual(list(cursor2), list(oplog2)) # earliest entry is the only one at/after timestamp doc = {"ts": bson.Timestamp(1000, 0), "i": 1} self.mongos_conn["test"]["mcsharded"].insert(doc) latest_timestamp = self.opman1.get_last_oplog_timestamp() cursor = self.opman1.get_oplog_cursor(latest_timestamp) self.assertNotEqual(cursor, None) self.assertEqual(cursor.count(), 1) next_entry_id = cursor[0]['o']['_id'] retrieved = self.mongos_conn.test.mcsharded.find_one(next_entry_id) self.assertEqual(retrieved, doc) # many entries before and after timestamp for i in range(2, 2002): self.mongos_conn["test"]["mcsharded"].insert({"i": i}) oplog1 = self.shard1_conn["local"]["oplog.rs"].find( sort=[("ts", pymongo.ASCENDING)]) oplog2 = self.shard2_conn["local"]["oplog.rs"].find( sort=[("ts", pymongo.ASCENDING)]) # oplogs should have records for inserts performed, plus # various other messages oplog1_count = oplog1.count() oplog2_count = oplog2.count() self.assertGreaterEqual(oplog1_count, 998) self.assertGreaterEqual(oplog2_count, 1002) pivot1 = oplog1.skip(400).limit(1)[0] pivot2 = oplog2.skip(400).limit(1)[0] cursor1 = self.opman1.get_oplog_cursor(pivot1["ts"]) cursor2 = self.opman2.get_oplog_cursor(pivot2["ts"]) self.assertEqual(cursor1.count(), oplog1_count - 400) self.assertEqual(cursor2.count(), oplog2_count - 400) def test_get_last_oplog_timestamp(self): """Test the get_last_oplog_timestamp method""" # "empty" the oplog self.opman1.oplog = self.shard1_conn["test"]["emptycollection"] self.opman2.oplog = self.shard2_conn["test"]["emptycollection"] self.assertEqual(self.opman1.get_last_oplog_timestamp(), None) self.assertEqual(self.opman2.get_last_oplog_timestamp(), None) # Test non-empty oplog self.opman1.oplog = self.shard1_conn["local"]["oplog.rs"] self.opman2.oplog = self.shard2_conn["local"]["oplog.rs"] for i in range(1000): self.mongos_conn["test"]["mcsharded"].insert({"i": i + 500}) oplog1 = self.shard1_conn["local"]["oplog.rs"] oplog1 = oplog1.find().sort("$natural", pymongo.DESCENDING).limit(1)[0] oplog2 = self.shard2_conn["local"]["oplog.rs"] oplog2 = oplog2.find().sort("$natural", pymongo.DESCENDING).limit(1)[0] self.assertEqual(self.opman1.get_last_oplog_timestamp(), oplog1["ts"]) self.assertEqual(self.opman2.get_last_oplog_timestamp(), oplog2["ts"]) def test_dump_collection(self): """Test the dump_collection method Cases: 1. empty oplog 2. non-empty oplog """ # Test with empty oplog self.opman1.oplog = self.shard1_conn["test"]["emptycollection"] self.opman2.oplog = self.shard2_conn["test"]["emptycollection"] last_ts1 = self.opman1.dump_collection() last_ts2 = self.opman2.dump_collection() self.assertEqual(last_ts1, None) self.assertEqual(last_ts2, None) # Test with non-empty oplog self.opman1.oplog = self.shard1_conn["local"]["oplog.rs"] self.opman2.oplog = self.shard2_conn["local"]["oplog.rs"] for i in range(1000): self.mongos_conn["test"]["mcsharded"].insert({"i": i + 500}) last_ts1 = self.opman1.get_last_oplog_timestamp() last_ts2 = self.opman2.get_last_oplog_timestamp() self.assertEqual(last_ts1, self.opman1.dump_collection()) self.assertEqual(last_ts2, self.opman2.dump_collection()) self.assertEqual(len(self.opman1.doc_managers[0]._search()), 1000) def test_init_cursor(self): """Test the init_cursor method Cases: 1. no last checkpoint, no collection dump 2. no last checkpoint, collection dump ok and stuff to dump 3. no last checkpoint, nothing to dump, stuff in oplog 4. no last checkpoint, nothing to dump, nothing in oplog 5. no last checkpoint, no collection dump, stuff in oplog 6. last checkpoint exists 7. last checkpoint is behind """ # N.B. these sub-cases build off of each other and cannot be re-ordered # without side-effects # No last checkpoint, no collection dump, nothing in oplog # "change oplog collection" to put nothing in oplog self.opman1.oplog = self.shard1_conn["test"]["emptycollection"] self.opman2.oplog = self.shard2_conn["test"]["emptycollection"] self.opman1.collection_dump = False self.opman2.collection_dump = False self.assertTrue( all(doc['op'] == 'n' for doc in self.opman1.init_cursor()[0])) self.assertEqual(self.opman1.checkpoint, None) self.assertTrue( all(doc['op'] == 'n' for doc in self.opman2.init_cursor()[0])) self.assertEqual(self.opman2.checkpoint, None) # No last checkpoint, empty collections, nothing in oplog self.opman1.collection_dump = self.opman2.collection_dump = True cursor, cursor_len = self.opman1.init_cursor() self.assertEqual(cursor, None) self.assertEqual(cursor_len, 0) self.assertEqual(self.opman1.checkpoint, None) cursor, cursor_len = self.opman2.init_cursor() self.assertEqual(cursor, None) self.assertEqual(cursor_len, 0) self.assertEqual(self.opman2.checkpoint, None) # No last checkpoint, empty collections, something in oplog self.opman1.oplog = self.shard1_conn["local"]["oplog.rs"] self.opman2.oplog = self.shard2_conn["local"]["oplog.rs"] oplog_startup_ts = self.opman2.get_last_oplog_timestamp() collection = self.mongos_conn["test"]["mcsharded"] collection.insert({"i": 1}) collection.remove({"i": 1}) time.sleep(3) last_ts1 = self.opman1.get_last_oplog_timestamp() cursor, cursor_len = self.opman1.init_cursor() self.assertEqual(cursor_len, 0) self.assertEqual(self.opman1.checkpoint, last_ts1) with self.opman1.oplog_progress as prog: self.assertEqual(prog.get_dict()[str(self.opman1.oplog)], last_ts1) # init_cursor should point to startup message in shard2 oplog cursor, cursor_len = self.opman2.init_cursor() self.assertEqual(cursor_len, 0) self.assertEqual(self.opman2.checkpoint, oplog_startup_ts) # No last checkpoint, no collection dump, stuff in oplog progress = LockingDict() self.opman1.oplog_progress = self.opman2.oplog_progress = progress self.opman1.collection_dump = self.opman2.collection_dump = False collection.insert({"i": 1200}) last_ts2 = self.opman2.get_last_oplog_timestamp() self.opman1.init_cursor() self.assertEqual(self.opman1.checkpoint, last_ts1) with self.opman1.oplog_progress as prog: self.assertEqual(prog.get_dict()[str(self.opman1.oplog)], last_ts1) cursor, cursor_len = self.opman2.init_cursor() for i in range(cursor_len - 1): next(cursor) self.assertEqual(next(cursor)["o"]["i"], 1200) self.assertEqual(self.opman2.checkpoint, last_ts2) with self.opman2.oplog_progress as prog: self.assertEqual(prog.get_dict()[str(self.opman2.oplog)], last_ts2) # Last checkpoint exists progress = LockingDict() self.opman1.oplog_progress = self.opman2.oplog_progress = progress for i in range(1000): collection.insert({"i": i + 500}) entry1 = list(self.shard1_conn["local"]["oplog.rs"].find(skip=200, limit=2)) entry2 = list(self.shard2_conn["local"]["oplog.rs"].find(skip=200, limit=2)) progress.get_dict()[str(self.opman1.oplog)] = entry1[0]["ts"] progress.get_dict()[str(self.opman2.oplog)] = entry2[0]["ts"] self.opman1.oplog_progress = self.opman2.oplog_progress = progress self.opman1.checkpoint = self.opman2.checkpoint = None cursor1, cursor_len1 = self.opman1.init_cursor() cursor2, cursor_len2 = self.opman2.init_cursor() self.assertEqual(entry1[1]["ts"], next(cursor1)["ts"]) self.assertEqual(entry2[1]["ts"], next(cursor2)["ts"]) self.assertEqual(self.opman1.checkpoint, entry1[0]["ts"]) self.assertEqual(self.opman2.checkpoint, entry2[0]["ts"]) with self.opman1.oplog_progress as prog: self.assertEqual(prog.get_dict()[str(self.opman1.oplog)], entry1[0]["ts"]) with self.opman2.oplog_progress as prog: self.assertEqual(prog.get_dict()[str(self.opman2.oplog)], entry2[0]["ts"]) # Last checkpoint is behind progress = LockingDict() progress.get_dict()[str(self.opman1.oplog)] = bson.Timestamp(1, 0) progress.get_dict()[str(self.opman2.oplog)] = bson.Timestamp(1, 0) self.opman1.oplog_progress = self.opman2.oplog_progress = progress self.opman1.checkpoint = self.opman2.checkpoint = None cursor, cursor_len = self.opman1.init_cursor() self.assertEqual(cursor_len, 0) self.assertEqual(cursor, None) self.assertIsNotNone(self.opman1.checkpoint) cursor, cursor_len = self.opman2.init_cursor() self.assertEqual(cursor_len, 0) self.assertEqual(cursor, None) self.assertIsNotNone(self.opman2.checkpoint) def test_rollback(self): """Test the rollback method in a sharded environment Cases: 1. Documents on both shards, rollback on one shard 2. Documents on both shards, rollback on both shards """ self.opman1.start() self.opman2.start() # Insert first documents while primaries are up db_main = self.mongos_conn["test"]["mcsharded"] db_main.insert({"i": 0}, w=2) db_main.insert({"i": 1000}, w=2) self.assertEqual(self.shard1_conn["test"]["mcsharded"].count(), 1) self.assertEqual(self.shard2_conn["test"]["mcsharded"].count(), 1) # Case 1: only one primary goes down, shard1 in this case self.cluster.shards[0].primary.stop(destroy=False) # Wait for the secondary to be promoted shard1_secondary_admin = self.shard1_secondary_conn["admin"] assert_soon( lambda: shard1_secondary_admin.command("isMaster")["ismaster"]) # Insert another document. This will be rolled back later retry_until_ok(db_main.insert, {"i": 1}) db_secondary1 = self.shard1_secondary_conn["test"]["mcsharded"] db_secondary2 = self.shard2_secondary_conn["test"]["mcsharded"] self.assertEqual(db_secondary1.count(), 2) # Wait for replication on the doc manager # Note that both OplogThreads share the same doc manager c = lambda: len(self.opman1.doc_managers[0]._search()) == 3 assert_soon(c, "not all writes were replicated to doc manager", max_tries=120) # Kill the new primary self.cluster.shards[0].secondary.stop(destroy=False) # Start both servers back up self.cluster.shards[0].primary.start() primary_admin = self.shard1_conn["admin"] c = lambda: primary_admin.command("isMaster")["ismaster"] assert_soon(lambda: retry_until_ok(c)) self.cluster.shards[0].secondary.start() secondary_admin = self.shard1_secondary_conn["admin"] c = lambda: secondary_admin.command("replSetGetStatus")["myState"] == 2 assert_soon(c) query = {"i": {"$lt": 1000}} assert_soon(lambda: retry_until_ok(db_main.find(query).count) > 0) # Only first document should exist in MongoDB self.assertEqual(db_main.find(query).count(), 1) self.assertEqual(db_main.find_one(query)["i"], 0) def check_docman_rollback(): docman_docs = [ d for d in self.opman1.doc_managers[0]._search() if d["i"] < 1000 ] return len(docman_docs) == 1 and docman_docs[0]["i"] == 0 assert_soon(check_docman_rollback, "doc manager did not roll back") # Wait for previous rollback to complete def rollback_done(): secondary1_count = retry_until_ok(db_secondary1.count) secondary2_count = retry_until_ok(db_secondary2.count) return (1, 1) == (secondary1_count, secondary2_count) assert_soon(rollback_done, "rollback never replicated to one or more secondaries") ############################## # Case 2: Primaries on both shards go down self.cluster.shards[0].primary.stop(destroy=False) self.cluster.shards[1].primary.stop(destroy=False) # Wait for the secondaries to be promoted shard1_secondary_admin = self.shard1_secondary_conn["admin"] shard2_secondary_admin = self.shard2_secondary_conn["admin"] assert_soon( lambda: shard1_secondary_admin.command("isMaster")["ismaster"]) assert_soon( lambda: shard2_secondary_admin.command("isMaster")["ismaster"]) # Insert another document on each shard. These will be rolled back later retry_until_ok(db_main.insert, {"i": 1}) self.assertEqual(db_secondary1.count(), 2) retry_until_ok(db_main.insert, {"i": 1001}) self.assertEqual(db_secondary2.count(), 2) # Wait for replication on the doc manager c = lambda: len(self.opman1.doc_managers[0]._search()) == 4 assert_soon(c, "not all writes were replicated to doc manager") # Kill the new primaries self.cluster.shards[0].secondary.stop(destroy=False) self.cluster.shards[1].secondary.stop(destroy=False) # Start the servers back up... # Shard 1 self.cluster.shards[0].primary.start() c = lambda: self.shard1_conn['admin'].command("isMaster")["ismaster"] assert_soon(lambda: retry_until_ok(c)) self.cluster.shards[0].secondary.start() secondary_admin = self.shard1_secondary_conn["admin"] c = lambda: secondary_admin.command("replSetGetStatus")["myState"] == 2 assert_soon(c) # Shard 2 self.cluster.shards[1].primary.start() c = lambda: self.shard2_conn['admin'].command("isMaster")["ismaster"] assert_soon(lambda: retry_until_ok(c)) self.cluster.shards[1].secondary.start() secondary_admin = self.shard2_secondary_conn["admin"] c = lambda: secondary_admin.command("replSetGetStatus")["myState"] == 2 assert_soon(c) # Wait for the shards to come online assert_soon(lambda: retry_until_ok(db_main.find(query).count) > 0) query2 = {"i": {"$gte": 1000}} assert_soon(lambda: retry_until_ok(db_main.find(query2).count) > 0) # Only first documents should exist in MongoDB self.assertEqual(db_main.find(query).count(), 1) self.assertEqual(db_main.find_one(query)["i"], 0) self.assertEqual(db_main.find(query2).count(), 1) self.assertEqual(db_main.find_one(query2)["i"], 1000) # Same should hold for the doc manager i_values = [d["i"] for d in self.opman1.doc_managers[0]._search()] self.assertEqual(len(i_values), 2) self.assertIn(0, i_values) self.assertIn(1000, i_values) def test_with_chunk_migration(self): """Test that DocManagers have proper state after both a successful and an unsuccessful chunk migration """ # Start replicating to dummy doc managers self.opman1.start() self.opman2.start() collection = self.mongos_conn["test"]["mcsharded"] for i in range(1000): collection.insert({"i": i + 500}) # Assert current state of the mongoverse self.assertEqual(self.shard1_conn["test"]["mcsharded"].find().count(), 500) self.assertEqual(self.shard2_conn["test"]["mcsharded"].find().count(), 500) assert_soon(lambda: len(self.opman1.doc_managers[0]._search()) == 1000) # Test successful chunk move from shard 1 to shard 2 self.mongos_conn["admin"].command("moveChunk", "test.mcsharded", find={"i": 1}, to="demo-set-1") # doc manager should still have all docs all_docs = self.opman1.doc_managers[0]._search() self.assertEqual(len(all_docs), 1000) for i, doc in enumerate(sorted(all_docs, key=lambda x: x["i"])): self.assertEqual(doc["i"], i + 500) # Mark the collection as "dropped". This will cause migration to fail. self.mongos_conn["config"]["collections"].update( {"_id": "test.mcsharded"}, {"$set": { "dropped": True }}) # Test unsuccessful chunk move from shard 2 to shard 1 def fail_to_move_chunk(): self.mongos_conn["admin"].command("moveChunk", "test.mcsharded", find={"i": 1}, to="demo-set-0") self.assertRaises(pymongo.errors.OperationFailure, fail_to_move_chunk) # doc manager should still have all docs all_docs = self.opman1.doc_managers[0]._search() self.assertEqual(len(all_docs), 1000) for i, doc in enumerate(sorted(all_docs, key=lambda x: x["i"])): self.assertEqual(doc["i"], i + 500) def test_with_orphan_documents(self): """Test that DocManagers have proper state after a chunk migration that resuts in orphaned documents. """ # Start replicating to dummy doc managers self.opman1.start() self.opman2.start() collection = self.mongos_conn["test"]["mcsharded"] collection.insert({"i": i + 500} for i in range(1000)) # Assert current state of the mongoverse self.assertEqual(self.shard1_conn["test"]["mcsharded"].find().count(), 500) self.assertEqual(self.shard2_conn["test"]["mcsharded"].find().count(), 500) assert_soon(lambda: len(self.opman1.doc_managers[0]._search()) == 1000) # Stop replication using the 'rsSyncApplyStop' failpoint self.shard1_conn.admin.command("configureFailPoint", "rsSyncApplyStop", mode="alwaysOn") # Move a chunk from shard2 to shard1 def move_chunk(): try: self.mongos_conn["admin"].command("moveChunk", "test.mcsharded", find={"i": 1000}, to="demo-set-0") except pymongo.errors.OperationFailure: pass # moveChunk will never complete, so use another thread to continue mover = threading.Thread(target=move_chunk) mover.start() # wait for documents to start moving to shard 1 assert_soon(lambda: self.shard1_conn.test.mcsharded.count() > 500) # Get opid for moveChunk command operations = self.mongos_conn.test.current_op() opid = None for op in operations["inprog"]: if op.get("query", {}).get("moveChunk"): opid = op["opid"] self.assertNotEqual(opid, None, "could not find moveChunk operation") # Kill moveChunk with the opid self.mongos_conn["test"]["$cmd.sys.killop"].find_one({"op": opid}) # Mongo Connector should not become confused by unsuccessful chunk move docs = self.opman1.doc_managers[0]._search() self.assertEqual(len(docs), 1000) self.assertEqual(sorted(d["i"] for d in docs), list(range(500, 1500))) # cleanup mover.join()
class TestOplogManager(unittest.TestCase): """Defines all the testing methods, as well as a method that sets up the cluster """ def setUp(self): self.repl_set = ReplicaSetSingle().start() self.primary_conn = self.repl_set.client() self.oplog_coll = self.primary_conn.local["oplog.rs"] self.opman = OplogThread( primary_client=self.primary_conn, doc_managers=(DocManager(),), oplog_progress_dict=LockingDict(), namespace_config=NamespaceConfig( namespace_options={"test.*": True, "gridfs.*": {"gridfs": True}} ), ) def tearDown(self): try: self.opman.join() except RuntimeError: pass # OplogThread may not have been started self.primary_conn.drop_database("test") close_client(self.primary_conn) self.repl_set.stop() def test_get_oplog_cursor(self): """Test the get_oplog_cursor method""" # timestamp is None - all oplog entries excluding no-ops are returned. cursor = self.opman.get_oplog_cursor(None) self.assertEqual( cursor.count(), self.primary_conn["local"]["oplog.rs"].find({"op": {"$ne": "n"}}).count(), ) # earliest entry is the only one at/after timestamp doc = {"ts": bson.Timestamp(1000, 0), "i": 1} self.primary_conn["test"]["test"].insert_one(doc) latest_timestamp = self.opman.get_last_oplog_timestamp() cursor = self.opman.get_oplog_cursor(latest_timestamp) self.assertNotEqual(cursor, None) self.assertEqual(cursor.count(), 1) next_entry_id = next(cursor)["o"]["_id"] retrieved = self.primary_conn.test.test.find_one(next_entry_id) self.assertEqual(retrieved, doc) # many entries before and after timestamp self.primary_conn["test"]["test"].insert_many( [{"i": i} for i in range(2, 1002)] ) oplog_cursor = self.oplog_coll.find( {"op": {"$ne": "n"}, "ns": {"$not": re.compile(r"\.(system|\$cmd)")}}, sort=[("ts", pymongo.ASCENDING)], ) # initial insert + 1000 more inserts self.assertEqual(oplog_cursor.count(), 1 + 1000) pivot = oplog_cursor.skip(400).limit(-1)[0] goc_cursor = self.opman.get_oplog_cursor(pivot["ts"]) self.assertEqual(goc_cursor.count(), 1 + 1000 - 400) def test_get_last_oplog_timestamp(self): """Test the get_last_oplog_timestamp method""" # "empty" the oplog self.opman.oplog = self.primary_conn["test"]["emptycollection"] self.assertEqual(self.opman.get_last_oplog_timestamp(), None) # Test non-empty oplog self.opman.oplog = self.primary_conn["local"]["oplog.rs"] for i in range(1000): self.primary_conn["test"]["test"].insert_one({"i": i + 500}) oplog = self.primary_conn["local"]["oplog.rs"] oplog = oplog.find().sort("$natural", pymongo.DESCENDING).limit(-1)[0] self.assertEqual(self.opman.get_last_oplog_timestamp(), oplog["ts"]) def test_dump_collection(self): """Test the dump_collection method Cases: 1. empty oplog 2. non-empty oplog, with gridfs collections 3. non-empty oplog, specified a namespace-set, none of the oplog entries are for collections in the namespace-set """ # Test with empty oplog self.opman.oplog = self.primary_conn["test"]["emptycollection"] last_ts = self.opman.dump_collection() self.assertEqual(last_ts, None) # Test with non-empty oplog with gridfs collections self.opman.oplog = self.primary_conn["local"]["oplog.rs"] # Insert 10 gridfs files for i in range(10): fs = gridfs.GridFS(self.primary_conn["gridfs"], collection="test" + str(i)) fs.put(b"hello world") # Insert 1000 documents for i in range(1000): self.primary_conn["test"]["test"].insert_one({"i": i + 500}) last_ts = self.opman.get_last_oplog_timestamp() self.assertEqual(last_ts, self.opman.dump_collection()) self.assertEqual(len(self.opman.doc_managers[0]._search()), 1010) # Case 3 # 1MB oplog so that we can rollover quickly repl_set = ReplicaSetSingle(oplogSize=1).start() conn = repl_set.client() opman = OplogThread( primary_client=conn, doc_managers=(DocManager(),), oplog_progress_dict=LockingDict(), namespace_config=NamespaceConfig(namespace_set=["test.test"]), ) # Insert a document into an included collection conn["test"]["test"].insert_one({"test": 1}) # Cause the oplog to rollover on a non-included collection while conn["local"]["oplog.rs"].find_one({"ns": "test.test"}): conn["test"]["ignored"].insert_many( [{"test": "1" * 1024} for _ in range(1024)] ) last_ts = opman.get_last_oplog_timestamp() self.assertEqual(last_ts, opman.dump_collection()) self.assertEqual(len(opman.doc_managers[0]._search()), 1) conn.close() repl_set.stop() def test_skipped_oplog_entry_updates_checkpoint(self): repl_set = ReplicaSetSingle().start() conn = repl_set.client() opman = OplogThread( primary_client=conn, doc_managers=(DocManager(),), oplog_progress_dict=LockingDict(), namespace_config=NamespaceConfig(namespace_set=["test.test"]), ) opman.start() # Insert a document into an included collection conn["test"]["test"].insert_one({"test": 1}) last_ts = opman.get_last_oplog_timestamp() assert_soon( lambda: last_ts == opman.checkpoint, "OplogThread never updated checkpoint to non-skipped " "entry.", ) self.assertEqual(len(opman.doc_managers[0]._search()), 1) # Make sure that the oplog thread updates its checkpoint on every # oplog entry. conn["test"]["ignored"].insert_one({"test": 1}) last_ts = opman.get_last_oplog_timestamp() assert_soon( lambda: last_ts == opman.checkpoint, "OplogThread never updated checkpoint to skipped entry.", ) opman.join() conn.close() repl_set.stop() def test_dump_collection_with_error(self): """Test the dump_collection method with invalid documents. Cases: 1. non-empty oplog, continue_on_error=True, invalid documents """ # non-empty oplog, continue_on_error=True, invalid documents self.opman.continue_on_error = True self.opman.oplog = self.primary_conn["local"]["oplog.rs"] docs = [{"a": i} for i in range(100)] for i in range(50, 60): docs[i]["_upsert_exception"] = True self.primary_conn["test"]["test"].insert_many(docs) last_ts = self.opman.get_last_oplog_timestamp() self.assertEqual(last_ts, self.opman.dump_collection()) docs = self.opman.doc_managers[0]._search() docs.sort(key=lambda doc: doc["a"]) self.assertEqual(len(docs), 90) expected_a = itertools.chain(range(0, 50), range(60, 100)) for doc, correct_a in zip(docs, expected_a): self.assertEqual(doc["a"], correct_a) def test_dump_collection_cancel(self): """Test that dump_collection returns None when cancelled.""" self.primary_conn["test"]["test"].insert_one({"test": "1"}) # Pretend that the OplogThead was cancelled self.opman.running = False self.assertIsNone(self.opman.dump_collection()) def test_init_cursor(self): """Test the init_cursor method Cases: 1. no last checkpoint, no collection dump 2. no last checkpoint, collection dump ok and stuff to dump 3. no last checkpoint, nothing to dump, stuff in oplog 4. no last checkpoint, nothing to dump, nothing in oplog 5. no last checkpoint, no collection dump, stuff in oplog 6. last checkpoint exists 7. last checkpoint is behind """ # N.B. these sub-cases build off of each other and cannot be re-ordered # without side-effects # No last checkpoint, no collection dump, nothing in oplog # "change oplog collection" to put nothing in oplog self.opman.oplog = self.primary_conn["test"]["emptycollection"] self.opman.collection_dump = False self.assertTrue(all(doc["op"] == "n" for doc in self.opman.init_cursor()[0])) self.assertEqual(self.opman.checkpoint, None) # No last checkpoint, empty collections, nothing in oplog self.opman.collection_dump = True cursor, cursor_empty = self.opman.init_cursor() self.assertEqual(cursor, None) self.assertTrue(cursor_empty) self.assertEqual(self.opman.checkpoint, None) # No last checkpoint, empty collections, something in oplog self.opman.oplog = self.primary_conn["local"]["oplog.rs"] collection = self.primary_conn["test"]["test"] collection.insert_one({"i": 1}) collection.delete_one({"i": 1}) time.sleep(3) last_ts = self.opman.get_last_oplog_timestamp() cursor, cursor_empty = self.opman.init_cursor() self.assertFalse(cursor_empty) self.assertEqual(self.opman.checkpoint, last_ts) self.assertEqual(self.opman.read_last_checkpoint(), last_ts) # No last checkpoint, no collection dump, something in oplog # If collection dump is false the checkpoint should not be set self.opman.checkpoint = None self.opman.oplog_progress = LockingDict() self.opman.collection_dump = False collection.insert_one({"i": 2}) cursor, cursor_empty = self.opman.init_cursor() for doc in cursor: last_doc = doc self.assertEqual(last_doc["o"]["i"], 2) self.assertIsNone(self.opman.checkpoint) # Last checkpoint exists, no collection dump, something in oplog collection.insert_many([{"i": i + 500} for i in range(1000)]) entry = list(self.primary_conn["local"]["oplog.rs"].find(skip=200, limit=-2)) self.opman.update_checkpoint(entry[0]["ts"]) cursor, cursor_empty = self.opman.init_cursor() self.assertEqual(next(cursor)["ts"], entry[1]["ts"]) self.assertEqual(self.opman.checkpoint, entry[0]["ts"]) self.assertEqual(self.opman.read_last_checkpoint(), entry[0]["ts"]) # Last checkpoint is behind self.opman.update_checkpoint(bson.Timestamp(1, 0)) cursor, cursor_empty = self.opman.init_cursor() self.assertTrue(cursor_empty) self.assertEqual(cursor, None) self.assertEqual(self.opman.checkpoint, bson.Timestamp(1, 0)) def test_namespace_mapping(self): """Test mapping of namespaces Cases: upsert/delete/update of documents: 1. in namespace set, mapping provided 2. outside of namespace set, mapping provided """ source_ns = ["test.test1", "test.test2"] phony_ns = ["test.phony1", "test.phony2"] dest_mapping = { "test.test1": "test.test1_dest", "test.test2": "test.test2_dest", } self.opman.namespace_config = NamespaceConfig( namespace_set=source_ns, namespace_options=dest_mapping ) docman = self.opman.doc_managers[0] # start replicating self.opman.start() base_doc = {"_id": 1, "name": "superman"} # doc in namespace set for ns in source_ns: db, coll = ns.split(".", 1) # test insert self.primary_conn[db][coll].insert_one(base_doc) assert_soon(lambda: len(docman._search()) == 1) self.assertEqual(docman._search()[0]["ns"], dest_mapping[ns]) bad = [d for d in docman._search() if d["ns"] == ns] self.assertEqual(len(bad), 0) # test update self.primary_conn[db][coll].update_one( {"_id": 1}, {"$set": {"weakness": "kryptonite"}} ) def update_complete(): docs = docman._search() for d in docs: if d.get("weakness") == "kryptonite": return True return False assert_soon(update_complete) self.assertEqual(docman._search()[0]["ns"], dest_mapping[ns]) bad = [d for d in docman._search() if d["ns"] == ns] self.assertEqual(len(bad), 0) # test delete self.primary_conn[db][coll].delete_one({"_id": 1}) assert_soon(lambda: len(docman._search()) == 0) bad = [d for d in docman._search() if d["ns"] == dest_mapping[ns]] self.assertEqual(len(bad), 0) # cleanup self.primary_conn[db][coll].delete_many({}) self.opman.doc_managers[0]._delete() # doc not in namespace set for ns in phony_ns: db, coll = ns.split(".", 1) # test insert self.primary_conn[db][coll].insert_one(base_doc) time.sleep(1) self.assertEqual(len(docman._search()), 0) # test update self.primary_conn[db][coll].update_one( {"_id": 1}, {"$set": {"weakness": "kryptonite"}} ) time.sleep(1) self.assertEqual(len(docman._search()), 0) def test_many_targets(self): """Test that one OplogThread is capable of replicating to more than one target. """ doc_managers = [DocManager(), DocManager(), DocManager()] self.opman.doc_managers = doc_managers # start replicating self.opman.start() self.primary_conn["test"]["test"].insert_one( {"name": "kermit", "color": "green"} ) self.primary_conn["test"]["test"].insert_one( {"name": "elmo", "color": "firetruck red"} ) assert_soon( lambda: sum(len(d._search()) for d in doc_managers) == 6, "OplogThread should be able to replicate to multiple targets", ) self.primary_conn["test"]["test"].delete_one({"name": "elmo"}) assert_soon( lambda: sum(len(d._search()) for d in doc_managers) == 3, "OplogThread should be able to replicate to multiple targets", ) for d in doc_managers: self.assertEqual(d._search()[0]["name"], "kermit") def test_upgrade_oplog_progress(self): first_oplog_ts = self.opman.oplog.find_one()["ts"] # Old format oplog progress file: progress = {str(self.opman.oplog): bson_ts_to_long(first_oplog_ts)} # Set up oplog managers to use the old format. oplog_progress = LockingDict() oplog_progress.dict = progress self.opman.oplog_progress = oplog_progress # Cause the oplog managers to update their checkpoints. self.opman.update_checkpoint(first_oplog_ts) # New format should be in place now. new_format = {self.opman.replset_name: first_oplog_ts} self.assertEqual(new_format, self.opman.oplog_progress.get_dict())
class TestOplogManager(unittest.TestCase): """Defines all the testing methods, as well as a method that sets up the cluster """ def setUp(self): self.repl_set = ReplicaSet().start() self.primary_conn = self.repl_set.client() self.oplog_coll = self.primary_conn.local['oplog.rs'] self.opman = OplogThread(primary_client=self.primary_conn, doc_managers=(DocManager(), ), oplog_progress_dict=LockingDict()) def tearDown(self): try: self.opman.join() except RuntimeError: pass # OplogThread may not have been started self.primary_conn.drop_database("test") close_client(self.primary_conn) self.repl_set.stop() def test_get_oplog_cursor(self): '''Test the get_oplog_cursor method''' # timestamp is None - all oplog entries are returned. cursor = self.opman.get_oplog_cursor(None) self.assertEqual(cursor.count(), self.primary_conn["local"]["oplog.rs"].count()) # earliest entry is the only one at/after timestamp doc = {"ts": bson.Timestamp(1000, 0), "i": 1} self.primary_conn["test"]["test"].insert_one(doc) latest_timestamp = self.opman.get_last_oplog_timestamp() cursor = self.opman.get_oplog_cursor(latest_timestamp) self.assertNotEqual(cursor, None) self.assertEqual(cursor.count(), 1) next_entry_id = next(cursor)['o']['_id'] retrieved = self.primary_conn.test.test.find_one(next_entry_id) self.assertEqual(retrieved, doc) # many entries before and after timestamp self.primary_conn["test"]["test"].insert_many([{ "i": i } for i in range(2, 1002)]) oplog_cursor = self.oplog_coll.find( { 'op': { '$ne': 'n' }, 'ns': { '$not': re.compile(r'\.(system|\$cmd)') } }, sort=[("ts", pymongo.ASCENDING)]) # initial insert + 1000 more inserts self.assertEqual(oplog_cursor.count(), 1 + 1000) pivot = oplog_cursor.skip(400).limit(-1)[0] goc_cursor = self.opman.get_oplog_cursor(pivot["ts"]) self.assertEqual(goc_cursor.count(), 1 + 1000 - 400) def test_get_last_oplog_timestamp(self): """Test the get_last_oplog_timestamp method""" # "empty" the oplog self.opman.oplog = self.primary_conn["test"]["emptycollection"] self.assertEqual(self.opman.get_last_oplog_timestamp(), None) # Test non-empty oplog self.opman.oplog = self.primary_conn["local"]["oplog.rs"] for i in range(1000): self.primary_conn["test"]["test"].insert_one({"i": i + 500}) oplog = self.primary_conn["local"]["oplog.rs"] oplog = oplog.find().sort("$natural", pymongo.DESCENDING).limit(-1)[0] self.assertEqual(self.opman.get_last_oplog_timestamp(), oplog["ts"]) def test_dump_collection(self): """Test the dump_collection method Cases: 1. empty oplog 2. non-empty oplog """ # Test with empty oplog self.opman.oplog = self.primary_conn["test"]["emptycollection"] last_ts = self.opman.dump_collection() self.assertEqual(last_ts, None) # Test with non-empty oplog self.opman.oplog = self.primary_conn["local"]["oplog.rs"] for i in range(1000): self.primary_conn["test"]["test"].insert_one({"i": i + 500}) last_ts = self.opman.get_last_oplog_timestamp() self.assertEqual(last_ts, self.opman.dump_collection()) self.assertEqual(len(self.opman.doc_managers[0]._search()), 1000) def test_dump_collection_with_error(self): """Test the dump_collection method with invalid documents. Cases: 1. non-empty oplog, continue_on_error=True, invalid documents """ # non-empty oplog, continue_on_error=True, invalid documents self.opman.continue_on_error = True self.opman.oplog = self.primary_conn["local"]["oplog.rs"] docs = [{'a': i} for i in range(100)] for i in range(50, 60): docs[i]['_upsert_exception'] = True self.primary_conn['test']['test'].insert_many(docs) last_ts = self.opman.get_last_oplog_timestamp() self.assertEqual(last_ts, self.opman.dump_collection()) docs = self.opman.doc_managers[0]._search() docs.sort(key=lambda doc: doc['a']) self.assertEqual(len(docs), 90) expected_a = itertools.chain(range(0, 50), range(60, 100)) for doc, correct_a in zip(docs, expected_a): self.assertEqual(doc['a'], correct_a) def test_init_cursor(self): """Test the init_cursor method Cases: 1. no last checkpoint, no collection dump 2. no last checkpoint, collection dump ok and stuff to dump 3. no last checkpoint, nothing to dump, stuff in oplog 4. no last checkpoint, nothing to dump, nothing in oplog 5. no last checkpoint, no collection dump, stuff in oplog 6. last checkpoint exists 7. last checkpoint is behind """ # N.B. these sub-cases build off of each other and cannot be re-ordered # without side-effects # No last checkpoint, no collection dump, nothing in oplog # "change oplog collection" to put nothing in oplog self.opman.oplog = self.primary_conn["test"]["emptycollection"] self.opman.collection_dump = False self.assertTrue( all(doc['op'] == 'n' for doc in self.opman.init_cursor()[0])) self.assertEqual(self.opman.checkpoint, None) # No last checkpoint, empty collections, nothing in oplog self.opman.collection_dump = True cursor, cursor_len = self.opman.init_cursor() self.assertEqual(cursor, None) self.assertEqual(cursor_len, 0) self.assertEqual(self.opman.checkpoint, None) # No last checkpoint, empty collections, something in oplog self.opman.oplog = self.primary_conn['local']['oplog.rs'] collection = self.primary_conn["test"]["test"] collection.insert_one({"i": 1}) collection.delete_one({"i": 1}) time.sleep(3) last_ts = self.opman.get_last_oplog_timestamp() cursor, cursor_len = self.opman.init_cursor() self.assertEqual(cursor_len, 0) self.assertEqual(self.opman.checkpoint, last_ts) with self.opman.oplog_progress as prog: self.assertEqual(prog.get_dict()[str(self.opman.oplog)], last_ts) # No last checkpoint, no collection dump, something in oplog self.opman.oplog_progress = LockingDict() self.opman.collection_dump = False collection.insert_one({"i": 2}) last_ts = self.opman.get_last_oplog_timestamp() cursor, cursor_len = self.opman.init_cursor() for i in range(cursor_len - 1): next(cursor) self.assertEqual(next(cursor)['o']['i'], 2) self.assertEqual(self.opman.checkpoint, last_ts) # Last checkpoint exists progress = LockingDict() self.opman.oplog_progress = progress for i in range(1000): collection.insert_one({"i": i + 500}) entry = list(self.primary_conn["local"]["oplog.rs"].find(skip=200, limit=-2)) progress.get_dict()[str(self.opman.oplog)] = entry[0]["ts"] self.opman.oplog_progress = progress self.opman.checkpoint = None cursor, cursor_len = self.opman.init_cursor() self.assertEqual(next(cursor)["ts"], entry[1]["ts"]) self.assertEqual(self.opman.checkpoint, entry[0]["ts"]) with self.opman.oplog_progress as prog: self.assertEqual(prog.get_dict()[str(self.opman.oplog)], entry[0]["ts"]) # Last checkpoint is behind progress = LockingDict() progress.get_dict()[str(self.opman.oplog)] = bson.Timestamp(1, 0) self.opman.oplog_progress = progress self.opman.checkpoint = None cursor, cursor_len = self.opman.init_cursor() self.assertEqual(cursor_len, 0) self.assertEqual(cursor, None) self.assertIsNotNone(self.opman.checkpoint) def test_namespace_mapping(self): """Test mapping of namespaces Cases: upsert/delete/update of documents: 1. in namespace set, mapping provided 2. outside of namespace set, mapping provided """ source_ns = ["test.test1", "test.test2"] phony_ns = ["test.phony1", "test.phony2"] dest_mapping = { "test.test1": "test.test1_dest", "test.test2": "test.test2_dest" } self.opman.dest_mapping = dest_mapping self.opman.namespace_set = source_ns docman = self.opman.doc_managers[0] # start replicating self.opman.start() base_doc = {"_id": 1, "name": "superman"} # doc in namespace set for ns in source_ns: db, coll = ns.split(".", 1) # test insert self.primary_conn[db][coll].insert_one(base_doc) assert_soon(lambda: len(docman._search()) == 1) self.assertEqual(docman._search()[0]["ns"], dest_mapping[ns]) bad = [d for d in docman._search() if d["ns"] == ns] self.assertEqual(len(bad), 0) # test update self.primary_conn[db][coll].update_one( {"_id": 1}, {"$set": { "weakness": "kryptonite" }}) def update_complete(): docs = docman._search() for d in docs: if d.get("weakness") == "kryptonite": return True return False assert_soon(update_complete) self.assertEqual(docman._search()[0]["ns"], dest_mapping[ns]) bad = [d for d in docman._search() if d["ns"] == ns] self.assertEqual(len(bad), 0) # test delete self.primary_conn[db][coll].delete_one({"_id": 1}) assert_soon(lambda: len(docman._search()) == 0) bad = [d for d in docman._search() if d["ns"] == dest_mapping[ns]] self.assertEqual(len(bad), 0) # cleanup self.primary_conn[db][coll].delete_many({}) self.opman.doc_managers[0]._delete() # doc not in namespace set for ns in phony_ns: db, coll = ns.split(".", 1) # test insert self.primary_conn[db][coll].insert_one(base_doc) time.sleep(1) self.assertEqual(len(docman._search()), 0) # test update self.primary_conn[db][coll].update_one( {"_id": 1}, {"$set": { "weakness": "kryptonite" }}) time.sleep(1) self.assertEqual(len(docman._search()), 0) def test_many_targets(self): """Test that one OplogThread is capable of replicating to more than one target. """ doc_managers = [DocManager(), DocManager(), DocManager()] self.opman.doc_managers = doc_managers # start replicating self.opman.start() self.primary_conn["test"]["test"].insert_one({ "name": "kermit", "color": "green" }) self.primary_conn["test"]["test"].insert_one({ "name": "elmo", "color": "firetruck red" }) assert_soon( lambda: sum(len(d._search()) for d in doc_managers) == 6, "OplogThread should be able to replicate to multiple targets") self.primary_conn["test"]["test"].delete_one({"name": "elmo"}) assert_soon( lambda: sum(len(d._search()) for d in doc_managers) == 3, "OplogThread should be able to replicate to multiple targets") for d in doc_managers: self.assertEqual(d._search()[0]["name"], "kermit")
class TestRollbacks(unittest.TestCase): def tearDown(self): kill_all() def setUp(self): # Create a new oplog progress file try: os.unlink("config.txt") except OSError: pass open("config.txt", "w").close() # Start a replica set start_cluster(sharded=False, use_mongos=False) # Connection to the replica set as a whole self.main_conn = Connection("localhost:%s" % PORTS_ONE["PRIMARY"], replicaSet="demo-repl") # Connection to the primary specifically self.primary_conn = Connection("localhost:%s" % PORTS_ONE["PRIMARY"]) # Connection to the secondary specifically self.secondary_conn = Connection( "localhost:%s" % PORTS_ONE["SECONDARY"], read_preference=ReadPreference.SECONDARY_PREFERRED) # Wipe any test data self.main_conn["test"]["mc"].drop() # Oplog thread doc_manager = DocManager() oplog_progress = LockingDict() self.opman = OplogThread( primary_conn=self.main_conn, main_address="localhost:%s" % PORTS_ONE["PRIMARY"], oplog_coll=self.main_conn["local"]["oplog.rs"], is_sharded=False, doc_manager=doc_manager, oplog_progress_dict=oplog_progress, namespace_set=["test.mc"], auth_key=None, auth_username=None, repl_set="demo-repl") def test_single_target(self): """Test with a single replication target""" self.opman.start() # Insert first document with primary up self.main_conn["test"]["mc"].insert({"i": 0}) self.assertEqual(self.primary_conn["test"]["mc"].find().count(), 1) # Make sure the insert is replicated secondary = self.secondary_conn self.assertTrue(wait_for(lambda: secondary["test"]["mc"].count() == 1), "first write didn't replicate to secondary") # Kill the primary kill_mongo_proc("localhost", PORTS_ONE["PRIMARY"]) # Wait for the secondary to be promoted while not secondary["admin"].command("isMaster")["ismaster"]: time.sleep(1) # Insert another document. This will be rolled back later retry_until_ok(self.main_conn["test"]["mc"].insert, {"i": 1}) self.assertEqual(secondary["test"]["mc"].count(), 2) # Wait for replication to doc manager c = lambda: len(self.opman.doc_managers[0]._search()) == 2 self.assertTrue(wait_for(c), "not all writes were replicated to doc manager") # Kill the new primary kill_mongo_proc("localhost", PORTS_ONE["SECONDARY"]) # Start both servers back up start_mongo_proc(port=PORTS_ONE['PRIMARY'], repl_set_name="demo-repl", data="/replset1a", log="/replset1a.log", key_file=None) primary_admin = self.primary_conn["admin"] while not primary_admin.command("isMaster")["ismaster"]: time.sleep(1) start_mongo_proc(port=PORTS_ONE['SECONDARY'], repl_set_name="demo-repl", data="/replset1b", log="/replset1b.log", key_file=None) while secondary["admin"].command("replSetGetStatus")["myState"] != 2: time.sleep(1) while retry_until_ok(self.main_conn["test"]["mc"].find().count) == 0: time.sleep(1) # Only first document should exist in MongoDB self.assertEqual(self.main_conn["test"]["mc"].count(), 1) self.assertEqual(self.main_conn["test"]["mc"].find_one()["i"], 0) # Same case should hold for the doc manager doc_manager = self.opman.doc_managers[0] self.assertEqual(len(doc_manager._search()), 1) self.assertEqual(doc_manager._search()[0]["i"], 0) # cleanup self.opman.join() def test_many_targets(self): """Test with several replication targets""" # OplogThread has multiple doc managers doc_managers = [DocManager(), DocManager(), DocManager()] self.opman.doc_managers = doc_managers self.opman.start() # Insert a document into each namespace self.main_conn["test"]["mc"].insert({"i": 0}) self.assertEqual(self.primary_conn["test"]["mc"].count(), 1) # Make sure the insert is replicated secondary = self.secondary_conn self.assertTrue(wait_for(lambda: secondary["test"]["mc"].count() == 1), "first write didn't replicate to secondary") # Kill the primary kill_mongo_proc("localhost", PORTS_ONE["PRIMARY"]) # Wait for the secondary to be promoted while not secondary["admin"].command("isMaster")["ismaster"]: time.sleep(1) # Insert more documents. This will be rolled back later # Some of these documents will be manually removed from # certain doc managers, to emulate the effect of certain # target systems being ahead/behind others secondary_ids = [] for i in range(1, 10): secondary_ids.append( retry_until_ok(self.main_conn["test"]["mc"].insert, {"i": i})) self.assertEqual(self.secondary_conn["test"]["mc"].count(), 10) # Wait for replication to the doc managers def docmans_done(): for dm in self.opman.doc_managers: if len(dm._search()) != 10: return False return True self.assertTrue(wait_for(docmans_done), "not all writes were replicated to doc managers") # Remove some documents from the doc managers to simulate # uneven replication for id in secondary_ids[8:]: self.opman.doc_managers[1].remove({"_id": id}) for id in secondary_ids[2:]: self.opman.doc_managers[2].remove({"_id": id}) # Kill the new primary kill_mongo_proc("localhost", PORTS_ONE["SECONDARY"]) # Start both servers back up start_mongo_proc(port=PORTS_ONE['PRIMARY'], repl_set_name="demo-repl", data="/replset1a", log="/replset1a.log", key_file=None) primary_admin = self.primary_conn["admin"] while not primary_admin.command("isMaster")["ismaster"]: time.sleep(1) start_mongo_proc(port=PORTS_ONE['SECONDARY'], repl_set_name="demo-repl", data="/replset1b", log="/replset1b.log", key_file=None) while secondary["admin"].command("replSetGetStatus")["myState"] != 2: time.sleep(1) while retry_until_ok( self.primary_conn["test"]["mc"].find().count) == 0: time.sleep(1) # Only first document should exist in MongoDB self.assertEqual(self.primary_conn["test"]["mc"].count(), 1) self.assertEqual(self.primary_conn["test"]["mc"].find_one()["i"], 0) # Same case should hold for the doc managers for dm in self.opman.doc_managers: self.assertEqual(len(dm._search()), 1) self.assertEqual(dm._search()[0]["i"], 0) self.opman.join()