def test_dump_collection(self):
        """Test the dump_collection method

        Cases:

        1. empty oplog
        2. non-empty oplog, with gridfs collections
        3. non-empty oplog, specified a namespace-set, none of the oplog
           entries are for collections in the namespace-set
        """

        # Test with empty oplog
        self.opman.oplog = self.primary_conn["test"]["emptycollection"]
        last_ts = self.opman.dump_collection()
        self.assertEqual(last_ts, None)

        # Test with non-empty oplog with gridfs collections
        self.opman.oplog = self.primary_conn["local"]["oplog.rs"]
        # Insert 10 gridfs files
        for i in range(10):
            fs = gridfs.GridFS(self.primary_conn["gridfs"],
                               collection="test" + str(i))
            fs.put(b"hello world")
        # Insert 1000 documents
        for i in range(1000):
            self.primary_conn["test"]["test"].insert_one({
                "i": i + 500
            })
        last_ts = self.opman.get_last_oplog_timestamp()
        self.assertEqual(last_ts, self.opman.dump_collection())
        self.assertEqual(len(self.opman.doc_managers[0]._search()), 1010)

        # Case 3
        # 1MB oplog so that we can rollover quickly
        repl_set = ReplicaSetSingle(oplogSize=1).start()
        conn = repl_set.client()
        opman = OplogThread(
            primary_client=conn,
            doc_managers=(DocManager(),),
            oplog_progress_dict=LockingDict(),
            namespace_config=NamespaceConfig(namespace_set=["test.test"]),
        )
        # Insert a document into an included collection
        conn["test"]["test"].insert_one({"test": 1})
        # Cause the oplog to rollover on a non-included collection
        while conn["local"]["oplog.rs"].find_one({"ns": "test.test"}):
            conn["test"]["ignored"].insert_many(
                [{"test": "1" * 1024} for _ in range(1024)])
        last_ts = opman.get_last_oplog_timestamp()
        self.assertEqual(last_ts, opman.dump_collection())
        self.assertEqual(len(opman.doc_managers[0]._search()), 1)
        conn.close()
        repl_set.stop()
    def test_dump_collection(self):
        """Test the dump_collection method

        Cases:

        1. empty oplog
        2. non-empty oplog
        3. non-empty oplog, specified a namespace-set, none of the oplog
           entries are for collections in the namespace-set
        """

        # Test with empty oplog
        self.opman.oplog = self.primary_conn["test"]["emptycollection"]
        last_ts = self.opman.dump_collection()
        self.assertEqual(last_ts, None)

        # Test with non-empty oplog
        self.opman.oplog = self.primary_conn["local"]["oplog.rs"]
        for i in range(1000):
            self.primary_conn["test"]["test"].insert_one({
                "i": i + 500
            })
        last_ts = self.opman.get_last_oplog_timestamp()
        self.assertEqual(last_ts, self.opman.dump_collection())
        self.assertEqual(len(self.opman.doc_managers[0]._search()), 1000)

        # Case 3
        # 1MB oplog so that we can rollover quickly
        repl_set = ReplicaSetSingle(oplogSize=1).start()
        conn = repl_set.client()
        dest_mapping_stru = DestMapping(["test.test"], [], {})
        opman = OplogThread(
            primary_client=conn,
            doc_managers=(DocManager(),),
            oplog_progress_dict=LockingDict(),
            dest_mapping_stru=dest_mapping_stru,
            ns_set=set(["test.test"])
        )
        # Insert a document into a ns_set collection
        conn["test"]["test"].insert_one({"test": 1})
        # Cause the oplog to rollover on a non-ns_set collection
        while conn["local"]["oplog.rs"].find_one({"ns": "test.test"}):
            conn["test"]["ignored"].insert_many(
                [{"test": "1" * 1024} for _ in range(1024)])
        last_ts = opman.get_last_oplog_timestamp()
        self.assertEqual(last_ts, opman.dump_collection())
        self.assertEqual(len(opman.doc_managers[0]._search()), 1)
        conn.close()
        repl_set.stop()
class TestOplogManagerSharded(unittest.TestCase):
    """Defines all test cases for OplogThreads running on a sharded
    cluster
    """

    def setUp(self):
        """ Initialize the cluster:

        Clean out the databases used by the tests
        Make connections to mongos, mongods
        Create and shard test collections
        Create OplogThreads
        """
        # Start the cluster with a mongos on port 27217
        self.mongos_p = start_cluster()

        # Connection to mongos
        mongos_address = '%s:%d' % (mongo_host, self.mongos_p)
        self.mongos_conn = MongoClient(mongos_address)

        # Connections to the shards
        shard1_ports = get_shard(self.mongos_p, 0)
        shard2_ports = get_shard(self.mongos_p, 1)
        self.shard1_prim_p = shard1_ports['primary']
        self.shard1_scnd_p = shard1_ports['secondaries'][0]
        self.shard2_prim_p = shard2_ports['primary']
        self.shard2_scnd_p = shard2_ports['secondaries'][0]
        self.shard1_conn = MongoClient('%s:%d'
                                       % (mongo_host, self.shard1_prim_p),
                                       replicaSet="demo-set-0")
        self.shard2_conn = MongoClient('%s:%d'
                                       % (mongo_host, self.shard2_prim_p),
                                       replicaSet="demo-set-1")
        self.shard1_secondary_conn = MongoClient(
            '%s:%d' % (mongo_host, self.shard1_scnd_p),
            read_preference=ReadPreference.SECONDARY_PREFERRED
        )
        self.shard2_secondary_conn = MongoClient(
            '%s:%d' % (mongo_host, self.shard2_scnd_p),
            read_preference=ReadPreference.SECONDARY_PREFERRED
        )

        # Wipe any test data
        self.mongos_conn["test"]["mcsharded"].drop()

        # Create and shard the collection test.mcsharded on the "i" field
        self.mongos_conn["test"]["mcsharded"].ensure_index("i")
        self.mongos_conn.admin.command("enableSharding", "test")
        self.mongos_conn.admin.command("shardCollection",
                                       "test.mcsharded",
                                       key={"i": 1})

        # Pre-split the collection so that:
        # i < 1000            lives on shard1
        # i >= 1000           lives on shard2
        self.mongos_conn.admin.command(bson.SON([
            ("split", "test.mcsharded"),
            ("middle", {"i": 1000})
        ]))

        # disable the balancer
        self.mongos_conn.config.settings.update(
            {"_id": "balancer"},
            {"$set": {"stopped": True}},
            upsert=True
        )

        # Move chunks to their proper places
        try:
            self.mongos_conn["admin"].command(
                "moveChunk",
                "test.mcsharded",
                find={"i": 1},
                to="demo-set-0"
            )
        except pymongo.errors.OperationFailure:
            pass        # chunk may already be on the correct shard
        try:
            self.mongos_conn["admin"].command(
                "moveChunk",
                "test.mcsharded",
                find={"i": 1000},
                to="demo-set-1"
            )
        except pymongo.errors.OperationFailure:
            pass        # chunk may already be on the correct shard

        # Make sure chunks are distributed correctly
        self.mongos_conn["test"]["mcsharded"].insert({"i": 1})
        self.mongos_conn["test"]["mcsharded"].insert({"i": 1000})

        def chunks_moved():
            doc1 = self.shard1_conn.test.mcsharded.find_one()
            doc2 = self.shard2_conn.test.mcsharded.find_one()
            if None in (doc1, doc2):
                return False
            return doc1['i'] == 1 and doc2['i'] == 1000
        assert_soon(chunks_moved)
        self.mongos_conn.test.mcsharded.remove()

        # create a new oplog progress file
        try:
            os.unlink("config.txt")
        except OSError:
            pass
        open("config.txt", "w").close()

        # Oplog threads (oplog manager) for each shard
        doc_manager = DocManager()
        oplog_progress = LockingDict()
        self.opman1 = OplogThread(
            primary_conn=self.shard1_conn,
            main_address='%s:%d' % (mongo_host, self.mongos_p),
            oplog_coll=self.shard1_conn["local"]["oplog.rs"],
            is_sharded=True,
            doc_manager=doc_manager,
            oplog_progress_dict=oplog_progress,
            namespace_set=["test.mcsharded", "test.mcunsharded"],
            auth_key=None,
            auth_username=None
        )
        self.opman2 = OplogThread(
            primary_conn=self.shard2_conn,
            main_address='%s:%d' % (mongo_host, self.mongos_p),
            oplog_coll=self.shard2_conn["local"]["oplog.rs"],
            is_sharded=True,
            doc_manager=doc_manager,
            oplog_progress_dict=oplog_progress,
            namespace_set=["test.mcsharded", "test.mcunsharded"],
            auth_key=None,
            auth_username=None
        )

    def tearDown(self):
        try:
            self.opman1.join()
        except RuntimeError:
            pass                # thread may not have been started
        try:
            self.opman2.join()
        except RuntimeError:
            pass                # thread may not have been started
        self.mongos_conn.close()
        self.shard1_conn.close()
        self.shard2_conn.close()
        self.shard1_secondary_conn.close()
        self.shard2_secondary_conn.close()
        kill_all()

    def test_retrieve_doc(self):
        """ Test the retrieve_doc method """

        # Trivial case where the oplog entry is None
        self.assertEqual(self.opman1.retrieve_doc(None), None)

        # Retrieve a document from insert operation in oplog
        doc = {"name": "mango", "type": "fruit",
               "ns": "test.mcsharded", "weight": 3.24, "i": 1}
        self.mongos_conn["test"]["mcsharded"].insert(doc)
        oplog_entries = self.shard1_conn["local"]["oplog.rs"].find(
            sort=[("ts", pymongo.DESCENDING)],
            limit=1
        )
        oplog_entry = next(oplog_entries)
        self.assertEqual(self.opman1.retrieve_doc(oplog_entry), doc)

        # Retrieve a document from update operation in oplog
        self.mongos_conn["test"]["mcsharded"].update(
            {"i": 1},
            {"$set": {"sounds-like": "mongo"}}
        )
        oplog_entries = self.shard1_conn["local"]["oplog.rs"].find(
            sort=[("ts", pymongo.DESCENDING)],
            limit=1
        )
        doc["sounds-like"] = "mongo"
        self.assertEqual(self.opman1.retrieve_doc(next(oplog_entries)), doc)

        # Retrieve a document from remove operation in oplog
        # (expected: None)
        self.mongos_conn["test"]["mcsharded"].remove({
            "i": 1
        })
        oplog_entries = self.shard1_conn["local"]["oplog.rs"].find(
            sort=[("ts", pymongo.DESCENDING)],
            limit=1
        )
        self.assertEqual(self.opman1.retrieve_doc(next(oplog_entries)), None)

        # Retrieve a document with bad _id
        # (expected: None)
        oplog_entry["o"]["_id"] = "ThisIsNotAnId123456789"
        self.assertEqual(self.opman1.retrieve_doc(oplog_entry), None)

    def test_get_oplog_cursor(self):
        """Test the get_oplog_cursor method"""

        # Trivial case: timestamp is None
        self.assertEqual(self.opman1.get_oplog_cursor(None), None)

        # earliest entry is after given timestamp
        doc = {"ts": bson.Timestamp(1000, 0), "i": 1}
        self.mongos_conn["test"]["mcsharded"].insert(doc)
        self.assertEqual(self.opman1.get_oplog_cursor(
            bson.Timestamp(1, 0)), None)

        # earliest entry is the only one at/after timestamp
        latest_timestamp = self.opman1.get_last_oplog_timestamp()
        cursor = self.opman1.get_oplog_cursor(latest_timestamp)
        self.assertNotEqual(cursor, None)
        self.assertEqual(cursor.count(), 1)
        self.assertEqual(self.opman1.retrieve_doc(cursor[0]), doc)

        # many entries before and after timestamp
        for i in range(2, 2002):
            self.mongos_conn["test"]["mcsharded"].insert({
                "i": i
            })
        oplog1 = self.shard1_conn["local"]["oplog.rs"].find(
            sort=[("ts", pymongo.ASCENDING)]
        )
        oplog2 = self.shard2_conn["local"]["oplog.rs"].find(
            sort=[("ts", pymongo.ASCENDING)]
        )

        # oplogs should have records for inserts performed, plus
        # various other messages
        oplog1_count = oplog1.count()
        oplog2_count = oplog2.count()
        self.assertGreaterEqual(oplog1_count, 998)
        self.assertGreaterEqual(oplog2_count, 1002)
        pivot1 = oplog1.skip(400).limit(1)[0]
        pivot2 = oplog2.skip(400).limit(1)[0]

        cursor1 = self.opman1.get_oplog_cursor(pivot1["ts"])
        cursor2 = self.opman2.get_oplog_cursor(pivot2["ts"])
        self.assertEqual(cursor1.count(), oplog1_count - 400)
        self.assertEqual(cursor2.count(), oplog2_count - 400)

        # get_oplog_cursor fast-forwards *one doc beyond* the given timestamp
        doc1 = self.mongos_conn["test"]["mcsharded"].find_one(
            {"_id": next(cursor1)["o"]["_id"]})
        doc2 = self.mongos_conn["test"]["mcsharded"].find_one(
            {"_id": next(cursor2)["o"]["_id"]})
        self.assertEqual(doc1["i"], self.opman1.retrieve_doc(pivot1)["i"] + 1)
        self.assertEqual(doc2["i"], self.opman2.retrieve_doc(pivot2)["i"] + 1)

    def test_get_last_oplog_timestamp(self):
        """Test the get_last_oplog_timestamp method"""

        # "empty" the oplog
        self.opman1.oplog = self.shard1_conn["test"]["emptycollection"]
        self.opman2.oplog = self.shard2_conn["test"]["emptycollection"]
        self.assertEqual(self.opman1.get_last_oplog_timestamp(), None)
        self.assertEqual(self.opman2.get_last_oplog_timestamp(), None)

        # Test non-empty oplog
        self.opman1.oplog = self.shard1_conn["local"]["oplog.rs"]
        self.opman2.oplog = self.shard2_conn["local"]["oplog.rs"]
        for i in range(1000):
            self.mongos_conn["test"]["mcsharded"].insert({
                "i": i + 500
            })
        oplog1 = self.shard1_conn["local"]["oplog.rs"]
        oplog1 = oplog1.find().sort("$natural", pymongo.DESCENDING).limit(1)[0]
        oplog2 = self.shard2_conn["local"]["oplog.rs"]
        oplog2 = oplog2.find().sort("$natural", pymongo.DESCENDING).limit(1)[0]
        self.assertEqual(self.opman1.get_last_oplog_timestamp(),
                         oplog1["ts"])
        self.assertEqual(self.opman2.get_last_oplog_timestamp(),
                         oplog2["ts"])

    def test_dump_collection(self):
        """Test the dump_collection method

        Cases:

        1. empty oplog
        2. non-empty oplog
        """

        # Test with empty oplog
        self.opman1.oplog = self.shard1_conn["test"]["emptycollection"]
        self.opman2.oplog = self.shard2_conn["test"]["emptycollection"]
        last_ts1 = self.opman1.dump_collection()
        last_ts2 = self.opman2.dump_collection()
        self.assertEqual(last_ts1, None)
        self.assertEqual(last_ts2, None)

        # Test with non-empty oplog
        self.opman1.oplog = self.shard1_conn["local"]["oplog.rs"]
        self.opman2.oplog = self.shard2_conn["local"]["oplog.rs"]
        for i in range(1000):
            self.mongos_conn["test"]["mcsharded"].insert({
                "i": i + 500
            })
        last_ts1 = self.opman1.get_last_oplog_timestamp()
        last_ts2 = self.opman2.get_last_oplog_timestamp()
        self.assertEqual(last_ts1, self.opman1.dump_collection())
        self.assertEqual(last_ts2, self.opman2.dump_collection())
        self.assertEqual(len(self.opman1.doc_managers[0]._search()), 1000)

    def test_init_cursor(self):
        """Test the init_cursor method

        Cases:

        1. no last checkpoint, no collection dump
        2. no last checkpoint, collection dump ok and stuff to dump
        3. no last checkpoint, nothing to dump, stuff in oplog
        4. no last checkpoint, nothing to dump, nothing in oplog
        5. last checkpoint exists
        """

        # N.B. these sub-cases build off of each other and cannot be re-ordered
        # without side-effects

        # No last checkpoint, no collection dump, nothing in oplog
        # "change oplog collection" to put nothing in oplog
        self.opman1.oplog = self.shard1_conn["test"]["emptycollection"]
        self.opman2.oplog = self.shard2_conn["test"]["emptycollection"]
        self.opman1.collection_dump = False
        self.opman2.collection_dump = False
        self.assertEqual(self.opman1.init_cursor(), None)
        self.assertEqual(self.opman1.checkpoint, None)
        self.assertEqual(self.opman2.init_cursor(), None)
        self.assertEqual(self.opman2.checkpoint, None)

        # No last checkpoint, empty collections, nothing in oplog
        self.opman1.collection_dump = True
        self.opman2.collection_dump = True
        self.assertEqual(self.opman1.init_cursor(), None)
        self.assertEqual(self.opman1.checkpoint, None)
        self.assertEqual(self.opman2.init_cursor(), None)
        self.assertEqual(self.opman2.checkpoint, None)

        # No last checkpoint, empty collections, something in oplog
        self.opman1.oplog = self.shard1_conn["local"]["oplog.rs"]
        self.opman2.oplog = self.shard2_conn["local"]["oplog.rs"]
        oplog_startup_ts = self.opman2.get_last_oplog_timestamp()
        collection = self.mongos_conn["test"]["mcsharded"]
        collection.insert({"i": 1})
        collection.remove({"i": 1})
        time.sleep(3)
        last_ts1 = self.opman1.get_last_oplog_timestamp()
        self.assertEqual(next(self.opman1.init_cursor())["ts"], last_ts1)
        self.assertEqual(self.opman1.checkpoint, last_ts1)
        with self.opman1.oplog_progress as prog:
            self.assertEqual(prog.get_dict()[str(self.opman1.oplog)], last_ts1)
        # init_cursor should point to startup message in shard2 oplog
        cursor = self.opman2.init_cursor()
        self.assertEqual(next(cursor)["ts"], oplog_startup_ts)
        self.assertEqual(self.opman2.checkpoint, oplog_startup_ts)

        # No last checkpoint, non-empty collections, stuff in oplog
        progress = LockingDict()
        self.opman1.oplog_progress = self.opman2.oplog_progress = progress
        collection.insert({"i": 1200})
        last_ts2 = self.opman2.get_last_oplog_timestamp()
        self.assertEqual(next(self.opman1.init_cursor())["ts"], last_ts1)
        self.assertEqual(self.opman1.checkpoint, last_ts1)
        with self.opman1.oplog_progress as prog:
            self.assertEqual(prog.get_dict()[str(self.opman1.oplog)], last_ts1)
        self.assertEqual(next(self.opman2.init_cursor())["ts"], last_ts2)
        self.assertEqual(self.opman2.checkpoint, last_ts2)
        with self.opman2.oplog_progress as prog:
            self.assertEqual(prog.get_dict()[str(self.opman2.oplog)], last_ts2)

        # Last checkpoint exists
        progress = LockingDict()
        self.opman1.oplog_progress = self.opman2.oplog_progress = progress
        for i in range(1000):
            collection.insert({"i": i + 500})
        entry1 = list(
            self.shard1_conn["local"]["oplog.rs"].find(skip=200, limit=2))
        entry2 = list(
            self.shard2_conn["local"]["oplog.rs"].find(skip=200, limit=2))
        progress.get_dict()[str(self.opman1.oplog)] = entry1[0]["ts"]
        progress.get_dict()[str(self.opman2.oplog)] = entry2[0]["ts"]
        self.opman1.oplog_progress = self.opman2.oplog_progress = progress
        self.opman1.checkpoint = self.opman2.checkpoint = None
        cursor1 = self.opman1.init_cursor()
        cursor2 = self.opman2.init_cursor()
        self.assertEqual(entry1[1]["ts"], next(cursor1)["ts"])
        self.assertEqual(entry2[1]["ts"], next(cursor2)["ts"])
        self.assertEqual(self.opman1.checkpoint, entry1[0]["ts"])
        self.assertEqual(self.opman2.checkpoint, entry2[0]["ts"])
        with self.opman1.oplog_progress as prog:
            self.assertEqual(prog.get_dict()[str(self.opman1.oplog)],
                             entry1[0]["ts"])
        with self.opman2.oplog_progress as prog:
            self.assertEqual(prog.get_dict()[str(self.opman2.oplog)],
                             entry2[0]["ts"])

    def test_rollback(self):
        """Test the rollback method in a sharded environment

        Cases:
        1. Documents on both shards, rollback on one shard
        2. Documents on both shards, rollback on both shards

        """

        self.opman1.start()
        self.opman2.start()

        # Insert first documents while primaries are up
        db_main = self.mongos_conn["test"]["mcsharded"]
        db_main.insert({"i": 0}, w=2)
        db_main.insert({"i": 1000}, w=2)
        self.assertEqual(self.shard1_conn["test"]["mcsharded"].count(), 1)
        self.assertEqual(self.shard2_conn["test"]["mcsharded"].count(), 1)

        # Case 1: only one primary goes down, shard1 in this case
        kill_mongo_proc(self.shard1_prim_p, destroy=False)

        # Wait for the secondary to be promoted
        shard1_secondary_admin = self.shard1_secondary_conn["admin"]
        assert_soon(
            lambda: shard1_secondary_admin.command("isMaster")["ismaster"])

        # Insert another document. This will be rolled back later
        retry_until_ok(db_main.insert, {"i": 1})
        db_secondary1 = self.shard1_secondary_conn["test"]["mcsharded"]
        db_secondary2 = self.shard2_secondary_conn["test"]["mcsharded"]
        self.assertEqual(db_secondary1.count(), 2)

        # Wait for replication on the doc manager
        # Note that both OplogThreads share the same doc manager
        c = lambda: len(self.opman1.doc_managers[0]._search()) == 3
        assert_soon(c, "not all writes were replicated to doc manager",
                    max_tries=120)

        # Kill the new primary
        kill_mongo_proc(self.shard1_scnd_p, destroy=False)

        # Start both servers back up
        restart_mongo_proc(self.shard1_prim_p)
        primary_admin = self.shard1_conn["admin"]
        c = lambda: primary_admin.command("isMaster")["ismaster"]
        assert_soon(lambda: retry_until_ok(c))
        restart_mongo_proc(self.shard1_scnd_p)
        secondary_admin = self.shard1_secondary_conn["admin"]
        c = lambda: secondary_admin.command("replSetGetStatus")["myState"] == 2
        assert_soon(c)
        query = {"i": {"$lt": 1000}}
        assert_soon(lambda: retry_until_ok(db_main.find(query).count) > 0)

        # Only first document should exist in MongoDB
        self.assertEqual(db_main.find(query).count(), 1)
        self.assertEqual(db_main.find_one(query)["i"], 0)

        # Same should hold for the doc manager
        docman_docs = [d for d in self.opman1.doc_managers[0]._search()
                       if d["i"] < 1000]
        self.assertEqual(len(docman_docs), 1)
        self.assertEqual(docman_docs[0]["i"], 0)

        # Wait for previous rollback to complete
        def rollback_done():
            secondary1_count = retry_until_ok(db_secondary1.count)
            secondary2_count = retry_until_ok(db_secondary2.count)
            return (1, 1) == (secondary1_count, secondary2_count)
        assert_soon(rollback_done,
                    "rollback never replicated to one or more secondaries")

        ##############################

        # Case 2: Primaries on both shards go down
        kill_mongo_proc(self.shard1_prim_p, destroy=False)
        kill_mongo_proc(self.shard2_prim_p, destroy=False)

        # Wait for the secondaries to be promoted
        shard1_secondary_admin = self.shard1_secondary_conn["admin"]
        shard2_secondary_admin = self.shard2_secondary_conn["admin"]
        assert_soon(
            lambda: shard1_secondary_admin.command("isMaster")["ismaster"])
        assert_soon(
            lambda: shard2_secondary_admin.command("isMaster")["ismaster"])

        # Insert another document on each shard. These will be rolled back later
        retry_until_ok(db_main.insert, {"i": 1})
        self.assertEqual(db_secondary1.count(), 2)
        retry_until_ok(db_main.insert, {"i": 1001})
        self.assertEqual(db_secondary2.count(), 2)

        # Wait for replication on the doc manager
        c = lambda: len(self.opman1.doc_managers[0]._search()) == 4
        assert_soon(c, "not all writes were replicated to doc manager")

        # Kill the new primaries
        kill_mongo_proc(self.shard1_scnd_p, destroy=False)
        kill_mongo_proc(self.shard2_scnd_p, destroy=False)

        # Start the servers back up...
        # Shard 1
        restart_mongo_proc(self.shard1_prim_p)
        c = lambda: self.shard1_conn['admin'].command("isMaster")["ismaster"]
        assert_soon(lambda: retry_until_ok(c))
        restart_mongo_proc(self.shard1_scnd_p)
        secondary_admin = self.shard1_secondary_conn["admin"]
        c = lambda: secondary_admin.command("replSetGetStatus")["myState"] == 2
        assert_soon(c)
        # Shard 2
        restart_mongo_proc(self.shard2_prim_p)
        c = lambda: self.shard2_conn['admin'].command("isMaster")["ismaster"]
        assert_soon(lambda: retry_until_ok(c))
        restart_mongo_proc(self.shard2_scnd_p)
        secondary_admin = self.shard2_secondary_conn["admin"]
        c = lambda: secondary_admin.command("replSetGetStatus")["myState"] == 2
        assert_soon(c)

        # Wait for the shards to come online
        assert_soon(lambda: retry_until_ok(db_main.find(query).count) > 0)
        query2 = {"i": {"$gte": 1000}}
        assert_soon(lambda: retry_until_ok(db_main.find(query2).count) > 0)

        # Only first documents should exist in MongoDB
        self.assertEqual(db_main.find(query).count(), 1)
        self.assertEqual(db_main.find_one(query)["i"], 0)
        self.assertEqual(db_main.find(query2).count(), 1)
        self.assertEqual(db_main.find_one(query2)["i"], 1000)

        # Same should hold for the doc manager
        i_values = [d["i"] for d in self.opman1.doc_managers[0]._search()]
        self.assertEqual(len(i_values), 2)
        self.assertIn(0, i_values)
        self.assertIn(1000, i_values)

    def test_with_chunk_migration(self):
        """Test that DocManagers have proper state after both a successful
        and an unsuccessful chunk migration
        """

        # Start replicating to dummy doc managers
        self.opman1.start()
        self.opman2.start()

        collection = self.mongos_conn["test"]["mcsharded"]
        for i in range(1000):
            collection.insert({"i": i + 500})
        # Assert current state of the mongoverse
        self.assertEqual(self.shard1_conn["test"]["mcsharded"].find().count(),
                         500)
        self.assertEqual(self.shard2_conn["test"]["mcsharded"].find().count(),
                         500)
        assert_soon(lambda: len(self.opman1.doc_managers[0]._search()) == 1000)

        # Test successful chunk move from shard 1 to shard 2
        self.mongos_conn["admin"].command(
            "moveChunk",
            "test.mcsharded",
            find={"i": 1},
            to="demo-set-1"
        )

        # doc manager should still have all docs
        all_docs = self.opman1.doc_managers[0]._search()
        self.assertEqual(len(all_docs), 1000)
        for i, doc in enumerate(sorted(all_docs, key=lambda x: x["i"])):
            self.assertEqual(doc["i"], i + 500)

        # Mark the collection as "dropped". This will cause migration to fail.
        self.mongos_conn["config"]["collections"].update(
            {"_id": "test.mcsharded"},
            {"$set": {"dropped": True}}
        )

        # Test unsuccessful chunk move from shard 2 to shard 1
        def fail_to_move_chunk():
            self.mongos_conn["admin"].command(
                "moveChunk",
                "test.mcsharded",
                find={"i": 1},
                to="demo-set-0"
            )
        self.assertRaises(pymongo.errors.OperationFailure, fail_to_move_chunk)
        # doc manager should still have all docs
        all_docs = self.opman1.doc_managers[0]._search()
        self.assertEqual(len(all_docs), 1000)
        for i, doc in enumerate(sorted(all_docs, key=lambda x: x["i"])):
            self.assertEqual(doc["i"], i + 500)

    def test_with_orphan_documents(self):
        """Test that DocManagers have proper state after a chunk migration
        that resuts in orphaned documents.
        """
        # Start replicating to dummy doc managers
        self.opman1.start()
        self.opman2.start()

        collection = self.mongos_conn["test"]["mcsharded"]
        collection.insert({"i": i + 500} for i in range(1000))
        # Assert current state of the mongoverse
        self.assertEqual(self.shard1_conn["test"]["mcsharded"].find().count(),
                         500)
        self.assertEqual(self.shard2_conn["test"]["mcsharded"].find().count(),
                         500)
        assert_soon(lambda: len(self.opman1.doc_managers[0]._search()) == 1000)

        # Stop replication using the 'rsSyncApplyStop' failpoint
        self.shard1_conn.admin.command(
            "configureFailPoint", "rsSyncApplyStop",
            mode="alwaysOn"
        )

        # Move a chunk from shard2 to shard1
        def move_chunk():
            try:
                self.mongos_conn["admin"].command(
                    "moveChunk",
                    "test.mcsharded",
                    find={"i": 1000},
                    to="demo-set-0"
                )
            except pymongo.errors.OperationFailure:
                pass

        # moveChunk will never complete, so use another thread to continue
        mover = threading.Thread(target=move_chunk)
        mover.start()

        # wait for documents to start moving to shard 1
        assert_soon(lambda: self.shard1_conn.test.mcsharded.count() > 500)

        # Get opid for moveChunk command
        operations = self.mongos_conn.test.current_op()
        opid = None
        for op in operations["inprog"]:
            if op.get("query", {}).get("moveChunk"):
                opid = op["opid"]
        self.assertNotEqual(opid, None, "could not find moveChunk operation")
        # Kill moveChunk with the opid
        self.mongos_conn["test"]["$cmd.sys.killop"].find_one({"op": opid})

        # Mongo Connector should not become confused by unsuccessful chunk move
        docs = self.opman1.doc_managers[0]._search()
        self.assertEqual(len(docs), 1000)
        self.assertEqual(sorted(d["i"] for d in docs),
                         list(range(500, 1500)))

        # cleanup
        mover.join()
class TestOplogManager(unittest.TestCase):
    """Defines all the testing methods, as well as a method that sets up the
        cluster
    """

    def setUp(self):
        _, _, self.primary_p = start_replica_set("test-oplog-manager")
        self.primary_conn = pymongo.MongoClient(mongo_host, self.primary_p)
        self.oplog_coll = self.primary_conn.local["oplog.rs"]
        self.opman = OplogThread(
            primary_conn=self.primary_conn,
            main_address="%s:%d" % (mongo_host, self.primary_p),
            oplog_coll=self.oplog_coll,
            is_sharded=False,
            doc_manager=DocManager(),
            oplog_progress_dict=LockingDict(),
            namespace_set=None,
            auth_key=None,
            auth_username=None,
            repl_set="test-oplog-manager",
        )

    def tearDown(self):
        try:
            self.opman.join()
        except RuntimeError:
            pass  # OplogThread may not have been started
        self.primary_conn.close()
        kill_replica_set("test-oplog-manager")

    def test_get_oplog_cursor(self):
        """Test the get_oplog_cursor method"""

        # timestamp is None - all oplog entries are returned.
        cursor = self.opman.get_oplog_cursor(None)
        self.assertEqual(cursor.count(), self.primary_conn["local"]["oplog.rs"].count())

        # earliest entry is the only one at/after timestamp
        doc = {"ts": bson.Timestamp(1000, 0), "i": 1}
        self.primary_conn["test"]["test"].insert(doc)
        latest_timestamp = self.opman.get_last_oplog_timestamp()
        cursor = self.opman.get_oplog_cursor(latest_timestamp)
        self.assertNotEqual(cursor, None)
        self.assertEqual(cursor.count(), 1)
        next_entry_id = next(cursor)["o"]["_id"]
        retrieved = self.primary_conn.test.test.find_one(next_entry_id)
        self.assertEqual(retrieved, doc)

        # many entries before and after timestamp
        self.primary_conn["test"]["test"].insert({"i": i} for i in range(2, 1002))
        oplog_cursor = self.oplog_coll.find(sort=[("ts", pymongo.ASCENDING)])

        # startup + insert + 1000 inserts
        self.assertEqual(oplog_cursor.count(), 2 + 1000)
        pivot = oplog_cursor.skip(400).limit(1)[0]

        goc_cursor = self.opman.get_oplog_cursor(pivot["ts"])
        self.assertEqual(goc_cursor.count(), 2 + 1000 - 400)

    def test_get_last_oplog_timestamp(self):
        """Test the get_last_oplog_timestamp method"""

        # "empty" the oplog
        self.opman.oplog = self.primary_conn["test"]["emptycollection"]
        self.assertEqual(self.opman.get_last_oplog_timestamp(), None)

        # Test non-empty oplog
        self.opman.oplog = self.primary_conn["local"]["oplog.rs"]
        for i in range(1000):
            self.primary_conn["test"]["test"].insert({"i": i + 500})
        oplog = self.primary_conn["local"]["oplog.rs"]
        oplog = oplog.find().sort("$natural", pymongo.DESCENDING).limit(1)[0]
        self.assertEqual(self.opman.get_last_oplog_timestamp(), oplog["ts"])

    def test_dump_collection(self):
        """Test the dump_collection method

        Cases:

        1. empty oplog
        2. non-empty oplog
        """

        # Test with empty oplog
        self.opman.oplog = self.primary_conn["test"]["emptycollection"]
        last_ts = self.opman.dump_collection()
        self.assertEqual(last_ts, None)

        # Test with non-empty oplog
        self.opman.oplog = self.primary_conn["local"]["oplog.rs"]
        for i in range(1000):
            self.primary_conn["test"]["test"].insert({"i": i + 500})
        last_ts = self.opman.get_last_oplog_timestamp()
        self.assertEqual(last_ts, self.opman.dump_collection())
        self.assertEqual(len(self.opman.doc_managers[0]._search()), 1000)

    def test_dump_collection_with_error(self):
        """Test the dump_collection method with invalid documents.

        Cases:

        1. non-empty oplog, continue_on_error=True, invalid documents
        """

        # non-empty oplog, continue_on_error=True, invalid documents
        self.opman.continue_on_error = True
        self.opman.oplog = self.primary_conn["local"]["oplog.rs"]

        docs = [{"a": i} for i in range(100)]
        for i in range(50, 60):
            docs[i]["_upsert_exception"] = True
        self.primary_conn["test"]["test"].insert(docs)

        last_ts = self.opman.get_last_oplog_timestamp()
        self.assertEqual(last_ts, self.opman.dump_collection())
        docs = self.opman.doc_managers[0]._search()
        docs.sort()

        self.assertEqual(len(docs), 90)
        for doc, correct_a in zip(docs, range(0, 50) + range(60, 100)):
            self.assertEquals(doc["a"], correct_a)

    def test_init_cursor(self):
        """Test the init_cursor method

        Cases:

        1. no last checkpoint, no collection dump
        2. no last checkpoint, collection dump ok and stuff to dump
        3. no last checkpoint, nothing to dump, stuff in oplog
        4. no last checkpoint, nothing to dump, nothing in oplog
        5. no last checkpoint, no collection dump, stuff in oplog
        6. last checkpoint exists
        7. last checkpoint is behind
        """

        # N.B. these sub-cases build off of each other and cannot be re-ordered
        # without side-effects

        # No last checkpoint, no collection dump, nothing in oplog
        # "change oplog collection" to put nothing in oplog
        self.opman.oplog = self.primary_conn["test"]["emptycollection"]
        self.opman.collection_dump = False
        self.assertTrue(all(doc["op"] == "n" for doc in self.opman.init_cursor()[0]))
        self.assertEqual(self.opman.checkpoint, None)

        # No last checkpoint, empty collections, nothing in oplog
        self.opman.collection_dump = True
        cursor, cursor_len = self.opman.init_cursor()
        self.assertEqual(cursor, None)
        self.assertEqual(cursor_len, 0)
        self.assertEqual(self.opman.checkpoint, None)

        # No last checkpoint, empty collections, something in oplog
        self.opman.oplog = self.primary_conn["local"]["oplog.rs"]
        collection = self.primary_conn["test"]["test"]
        collection.insert({"i": 1})
        collection.remove({"i": 1})
        time.sleep(3)
        last_ts = self.opman.get_last_oplog_timestamp()
        cursor, cursor_len = self.opman.init_cursor()
        self.assertEqual(cursor_len, 0)
        self.assertEqual(self.opman.checkpoint, last_ts)
        with self.opman.oplog_progress as prog:
            self.assertEqual(prog.get_dict()[str(self.opman.oplog)], last_ts)

        # No last checkpoint, no collection dump, something in oplog
        self.opman.oplog_progress = LockingDict()
        self.opman.collection_dump = False
        collection.insert({"i": 2})
        last_ts = self.opman.get_last_oplog_timestamp()
        cursor, cursor_len = self.opman.init_cursor()
        for i in range(cursor_len - 1):
            next(cursor)
        self.assertEqual(next(cursor)["o"]["i"], 2)
        self.assertEqual(self.opman.checkpoint, last_ts)

        # Last checkpoint exists
        progress = LockingDict()
        self.opman.oplog_progress = progress
        for i in range(1000):
            collection.insert({"i": i + 500})
        entry = list(self.primary_conn["local"]["oplog.rs"].find(skip=200, limit=2))
        progress.get_dict()[str(self.opman.oplog)] = entry[0]["ts"]
        self.opman.oplog_progress = progress
        self.opman.checkpoint = None
        cursor, cursor_len = self.opman.init_cursor()
        self.assertEqual(next(cursor)["ts"], entry[1]["ts"])
        self.assertEqual(self.opman.checkpoint, entry[0]["ts"])
        with self.opman.oplog_progress as prog:
            self.assertEqual(prog.get_dict()[str(self.opman.oplog)], entry[0]["ts"])

        # Last checkpoint is behind
        progress = LockingDict()
        progress.get_dict()[str(self.opman.oplog)] = bson.Timestamp(1, 0)
        self.opman.oplog_progress = progress
        self.opman.checkpoint = None
        cursor, cursor_len = self.opman.init_cursor()
        self.assertEqual(cursor_len, 0)
        self.assertEqual(cursor, None)
        self.assertIsNotNone(self.opman.checkpoint)

    def test_filter_fields(self):
        docman = self.opman.doc_managers[0]
        conn = self.opman.main_connection

        include_fields = ["a", "b", "c"]
        exclude_fields = ["d", "e", "f"]

        # Set fields to care about
        self.opman.fields = include_fields
        # Documents have more than just these fields
        doc = {"a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6, "_id": 1}
        db = conn["test"]["test"]
        db.insert(doc)
        assert_soon(lambda: db.count() == 1)
        self.opman.dump_collection()

        result = docman._search()[0]
        keys = result.keys()
        for inc, exc in zip(include_fields, exclude_fields):
            self.assertIn(inc, keys)
            self.assertNotIn(exc, keys)

    def test_namespace_mapping(self):
        """Test mapping of namespaces
        Cases:

        upsert/delete/update of documents:
        1. in namespace set, mapping provided
        2. outside of namespace set, mapping provided
        """

        source_ns = ["test.test1", "test.test2"]
        phony_ns = ["test.phony1", "test.phony2"]
        dest_mapping = {"test.test1": "test.test1_dest", "test.test2": "test.test2_dest"}
        self.opman.dest_mapping = dest_mapping
        self.opman.namespace_set = source_ns
        docman = self.opman.doc_managers[0]
        # start replicating
        self.opman.start()

        base_doc = {"_id": 1, "name": "superman"}

        # doc in namespace set
        for ns in source_ns:
            db, coll = ns.split(".", 1)

            # test insert
            self.primary_conn[db][coll].insert(base_doc)

            assert_soon(lambda: len(docman._search()) == 1)
            self.assertEqual(docman._search()[0]["ns"], dest_mapping[ns])
            bad = [d for d in docman._search() if d["ns"] == ns]
            self.assertEqual(len(bad), 0)

            # test update
            self.primary_conn[db][coll].update({"_id": 1}, {"$set": {"weakness": "kryptonite"}})

            def update_complete():
                docs = docman._search()
                for d in docs:
                    if d.get("weakness") == "kryptonite":
                        return True
                    return False

            assert_soon(update_complete)
            self.assertEqual(docman._search()[0]["ns"], dest_mapping[ns])
            bad = [d for d in docman._search() if d["ns"] == ns]
            self.assertEqual(len(bad), 0)

            # test delete
            self.primary_conn[db][coll].remove({"_id": 1})
            assert_soon(lambda: len(docman._search()) == 0)
            bad = [d for d in docman._search() if d["ns"] == dest_mapping[ns]]
            self.assertEqual(len(bad), 0)

            # cleanup
            self.primary_conn[db][coll].remove()
            self.opman.doc_managers[0]._delete()

        # doc not in namespace set
        for ns in phony_ns:
            db, coll = ns.split(".", 1)

            # test insert
            self.primary_conn[db][coll].insert(base_doc)
            time.sleep(1)
            self.assertEqual(len(docman._search()), 0)
            # test update
            self.primary_conn[db][coll].update({"_id": 1}, {"$set": {"weakness": "kryptonite"}})
            time.sleep(1)
            self.assertEqual(len(docman._search()), 0)

    def test_many_targets(self):
        """Test that one OplogThread is capable of replicating to more than
        one target.
        """
        doc_managers = [DocManager(), DocManager(), DocManager()]
        self.opman.doc_managers = doc_managers

        # start replicating
        self.opman.start()
        self.primary_conn["test"]["test"].insert({"name": "kermit", "color": "green"})
        self.primary_conn["test"]["test"].insert({"name": "elmo", "color": "firetruck red"})

        assert_soon(
            lambda: sum(len(d._search()) for d in doc_managers) == 6,
            "OplogThread should be able to replicate to multiple targets",
        )

        self.primary_conn["test"]["test"].remove({"name": "elmo"})

        assert_soon(
            lambda: sum(len(d._search()) for d in doc_managers) == 3,
            "OplogThread should be able to replicate to multiple targets",
        )
        for d in doc_managers:
            self.assertEqual(d._search()[0]["name"], "kermit")

    def test_filter_oplog_entry(self):
        # Test oplog entries: these are callables, since
        # filter_oplog_entry modifies the oplog entry in-place
        insert_op = lambda: {"op": "i", "o": {"_id": 0, "a": 1, "b": 2, "c": 3}}
        update_op = lambda: {"op": "u", "o": {"$set": {"a": 4, "b": 5}, "$unset": {"c": True}}, "o2": {"_id": 1}}

        # Case 0: insert op, no fields provided
        self.opman.fields = None
        filtered = self.opman.filter_oplog_entry(insert_op())
        self.assertEqual(filtered, insert_op())

        # Case 1: insert op, fields provided
        self.opman.fields = ["a", "b"]
        filtered = self.opman.filter_oplog_entry(insert_op())
        self.assertEqual(filtered["o"], {"_id": 0, "a": 1, "b": 2})

        # Case 2: insert op, fields provided, doc becomes empty except for _id
        self.opman.fields = ["d", "e", "f"]
        filtered = self.opman.filter_oplog_entry(insert_op())
        self.assertEqual(filtered["o"], {"_id": 0})

        # Case 3: update op, no fields provided
        self.opman.fields = None
        filtered = self.opman.filter_oplog_entry(update_op())
        self.assertEqual(filtered, update_op())

        # Case 4: update op, fields provided
        self.opman.fields = ["a", "c"]
        filtered = self.opman.filter_oplog_entry(update_op())
        self.assertNotIn("b", filtered["o"]["$set"])
        self.assertIn("a", filtered["o"]["$set"])
        self.assertEqual(filtered["o"]["$unset"], update_op()["o"]["$unset"])

        # Case 5: update op, fields provided, empty $set
        self.opman.fields = ["c"]
        filtered = self.opman.filter_oplog_entry(update_op())
        self.assertNotIn("$set", filtered["o"])
        self.assertEqual(filtered["o"]["$unset"], update_op()["o"]["$unset"])

        # Case 6: update op, fields provided, empty $unset
        self.opman.fields = ["a", "b"]
        filtered = self.opman.filter_oplog_entry(update_op())
        self.assertNotIn("$unset", filtered["o"])
        self.assertEqual(filtered["o"]["$set"], update_op()["o"]["$set"])

        # Case 7: update op, fields provided, entry is nullified
        self.opman.fields = ["d", "e", "f"]
        filtered = self.opman.filter_oplog_entry(update_op())
        self.assertEqual(filtered, None)
class TestOplogManagerSharded(unittest.TestCase):
    """Defines all test cases for OplogThreads running on a sharded
    cluster
    """

    def setUp(self):
        """ Initialize the cluster:

        Clean out the databases used by the tests
        Make connections to mongos, mongods
        Create and shard test collections
        Create OplogThreads
        """
        self.cluster = ShardedCluster().start()

        # Connection to mongos
        self.mongos_conn = self.cluster.client()

        # Connections to the shards
        self.shard1_conn = self.cluster.shards[0].client()
        self.shard2_conn = self.cluster.shards[1].client()
        self.shard1_secondary_conn = self.cluster.shards[0].secondary.client(
            read_preference=ReadPreference.SECONDARY_PREFERRED)
        self.shard2_secondary_conn = self.cluster.shards[1].secondary.client(
            read_preference=ReadPreference.SECONDARY_PREFERRED
        )

        # Wipe any test data
        self.mongos_conn["test"]["mcsharded"].drop()

        # Create and shard the collection test.mcsharded on the "i" field
        self.mongos_conn["test"]["mcsharded"].create_index("i")
        self.mongos_conn.admin.command("enableSharding", "test")
        self.mongos_conn.admin.command("shardCollection",
                                       "test.mcsharded",
                                       key={"i": 1})

        # Pre-split the collection so that:
        # i < 1000            lives on shard1
        # i >= 1000           lives on shard2
        self.mongos_conn.admin.command(bson.SON([
            ("split", "test.mcsharded"),
            ("middle", {"i": 1000})
        ]))

        # disable the balancer
        self.mongos_conn.config.settings.update_one(
            {"_id": "balancer"},
            {"$set": {"stopped": True}},
            upsert=True
        )

        # Move chunks to their proper places
        try:
            self.mongos_conn["admin"].command(
                "moveChunk",
                "test.mcsharded",
                find={"i": 1},
                to='demo-set-0'
            )
        except pymongo.errors.OperationFailure:
            pass
        try:
            self.mongos_conn["admin"].command(
                "moveChunk",
                "test.mcsharded",
                find={"i": 1000},
                to='demo-set-1'
            )
        except pymongo.errors.OperationFailure:
            pass

        # Make sure chunks are distributed correctly
        self.mongos_conn["test"]["mcsharded"].insert_one({"i": 1})
        self.mongos_conn["test"]["mcsharded"].insert_one({"i": 1000})

        def chunks_moved():
            doc1 = self.shard1_conn.test.mcsharded.find_one()
            doc2 = self.shard2_conn.test.mcsharded.find_one()
            if None in (doc1, doc2):
                return False
            return doc1['i'] == 1 and doc2['i'] == 1000
        assert_soon(chunks_moved, max_tries=120,
                    message='chunks not moved? doc1=%r, doc2=%r' % (
                        self.shard1_conn.test.mcsharded.find_one(),
                        self.shard2_conn.test.mcsharded.find_one()))
        self.mongos_conn.test.mcsharded.delete_many({})

        # create a new oplog progress file
        try:
            os.unlink("oplog.timestamp")
        except OSError:
            pass
        open("oplog.timestamp", "w").close()

        # Oplog threads (oplog manager) for each shard
        doc_manager = DocManager()
        oplog_progress = LockingDict()
        self.opman1 = OplogThread(
            primary_client=self.shard1_conn,
            doc_managers=(doc_manager,),
            oplog_progress_dict=oplog_progress,
            ns_set=["test.mcsharded", "test.mcunsharded"],
            mongos_client=self.mongos_conn
        )
        self.opman2 = OplogThread(
            primary_client=self.shard2_conn,
            doc_managers=(doc_manager,),
            oplog_progress_dict=oplog_progress,
            ns_set=["test.mcsharded", "test.mcunsharded"],
            mongos_client=self.mongos_conn
        )

    def tearDown(self):
        try:
            self.opman1.join()
        except RuntimeError:
            pass                # thread may not have been started
        try:
            self.opman2.join()
        except RuntimeError:
            pass                # thread may not have been started
        close_client(self.mongos_conn)
        close_client(self.shard1_conn)
        close_client(self.shard2_conn)
        close_client(self.shard1_secondary_conn)
        close_client(self.shard2_secondary_conn)
        self.cluster.stop()

    def test_get_oplog_cursor(self):
        """Test the get_oplog_cursor method"""

        # timestamp = None
        cursor1 = self.opman1.get_oplog_cursor(None)
        oplog1 = self.shard1_conn["local"]["oplog.rs"].find(
            {'op': {'$ne': 'n'},
             'ns': {'$not': re.compile(r'\.system')}})
        self.assertEqual(list(cursor1), list(oplog1))

        cursor2 = self.opman2.get_oplog_cursor(None)
        oplog2 = self.shard2_conn["local"]["oplog.rs"].find(
            {'op': {'$ne': 'n'},
             'ns': {'$not': re.compile(r'\.system')}})
        self.assertEqual(list(cursor2), list(oplog2))

        # earliest entry is the only one at/after timestamp
        doc = {"ts": bson.Timestamp(1000, 0), "i": 1}
        self.mongos_conn["test"]["mcsharded"].insert_one(doc)
        latest_timestamp = self.opman1.get_last_oplog_timestamp()
        cursor = self.opman1.get_oplog_cursor(latest_timestamp)
        self.assertNotEqual(cursor, None)
        self.assertEqual(cursor.count(), 1)
        next_entry_id = cursor[0]['o']['_id']
        retrieved = self.mongos_conn.test.mcsharded.find_one(next_entry_id)
        self.assertEqual(retrieved, doc)

        # many entries before and after timestamp
        for i in range(2, 2002):
            self.mongos_conn["test"]["mcsharded"].insert_one({
                "i": i
            })
        oplog1 = self.shard1_conn["local"]["oplog.rs"].find(
            sort=[("ts", pymongo.ASCENDING)]
        )
        oplog2 = self.shard2_conn["local"]["oplog.rs"].find(
            sort=[("ts", pymongo.ASCENDING)]
        )

        # oplogs should have records for inserts performed, plus
        # various other messages
        oplog1_count = oplog1.count()
        oplog2_count = oplog2.count()
        self.assertGreaterEqual(oplog1_count, 998)
        self.assertGreaterEqual(oplog2_count, 1002)
        pivot1 = oplog1.skip(400).limit(-1)[0]
        pivot2 = oplog2.skip(400).limit(-1)[0]

        cursor1 = self.opman1.get_oplog_cursor(pivot1["ts"])
        cursor2 = self.opman2.get_oplog_cursor(pivot2["ts"])
        self.assertEqual(cursor1.count(), oplog1_count - 400)
        self.assertEqual(cursor2.count(), oplog2_count - 400)

    def test_get_last_oplog_timestamp(self):
        """Test the get_last_oplog_timestamp method"""

        # "empty" the oplog
        self.opman1.oplog = self.shard1_conn["test"]["emptycollection"]
        self.opman2.oplog = self.shard2_conn["test"]["emptycollection"]
        self.assertEqual(self.opman1.get_last_oplog_timestamp(), None)
        self.assertEqual(self.opman2.get_last_oplog_timestamp(), None)

        # Test non-empty oplog
        self.opman1.oplog = self.shard1_conn["local"]["oplog.rs"]
        self.opman2.oplog = self.shard2_conn["local"]["oplog.rs"]
        for i in range(1000):
            self.mongos_conn["test"]["mcsharded"].insert_one({
                "i": i + 500
            })
        oplog1 = self.shard1_conn["local"]["oplog.rs"]
        oplog1 = oplog1.find().sort("$natural", pymongo.DESCENDING).limit(-1)[0]
        oplog2 = self.shard2_conn["local"]["oplog.rs"]
        oplog2 = oplog2.find().sort("$natural", pymongo.DESCENDING).limit(-1)[0]
        self.assertEqual(self.opman1.get_last_oplog_timestamp(),
                         oplog1["ts"])
        self.assertEqual(self.opman2.get_last_oplog_timestamp(),
                         oplog2["ts"])

    def test_dump_collection(self):
        """Test the dump_collection method

        Cases:

        1. empty oplog
        2. non-empty oplog
        """

        # Test with empty oplog
        self.opman1.oplog = self.shard1_conn["test"]["emptycollection"]
        self.opman2.oplog = self.shard2_conn["test"]["emptycollection"]
        last_ts1 = self.opman1.dump_collection()
        last_ts2 = self.opman2.dump_collection()
        self.assertEqual(last_ts1, None)
        self.assertEqual(last_ts2, None)

        # Test with non-empty oplog
        self.opman1.oplog = self.shard1_conn["local"]["oplog.rs"]
        self.opman2.oplog = self.shard2_conn["local"]["oplog.rs"]
        for i in range(1000):
            self.mongos_conn["test"]["mcsharded"].insert_one({
                "i": i + 500
            })
        last_ts1 = self.opman1.get_last_oplog_timestamp()
        last_ts2 = self.opman2.get_last_oplog_timestamp()
        self.assertEqual(last_ts1, self.opman1.dump_collection())
        self.assertEqual(last_ts2, self.opman2.dump_collection())
        self.assertEqual(len(self.opman1.doc_managers[0]._search()), 1000)

    def test_init_cursor(self):
        """Test the init_cursor method

        Cases:

        1. no last checkpoint, no collection dump
        2. no last checkpoint, collection dump ok and stuff to dump
        3. no last checkpoint, nothing to dump, stuff in oplog
        4. no last checkpoint, nothing to dump, nothing in oplog
        5. no last checkpoint, no collection dump, stuff in oplog
        6. last checkpoint exists
        7. last checkpoint is behind
        """

        # N.B. these sub-cases build off of each other and cannot be re-ordered
        # without side-effects

        # No last checkpoint, no collection dump, nothing in oplog
        # "change oplog collection" to put nothing in oplog
        self.opman1.oplog = self.shard1_conn["test"]["emptycollection"]
        self.opman2.oplog = self.shard2_conn["test"]["emptycollection"]
        self.opman1.collection_dump = False
        self.opman2.collection_dump = False
        self.assertTrue(all(doc['op'] == 'n'
                            for doc in self.opman1.init_cursor()[0]))
        self.assertEqual(self.opman1.checkpoint, None)
        self.assertTrue(all(doc['op'] == 'n'
                            for doc in self.opman2.init_cursor()[0]))
        self.assertEqual(self.opman2.checkpoint, None)

        # No last checkpoint, empty collections, nothing in oplog
        self.opman1.collection_dump = self.opman2.collection_dump = True

        cursor, cursor_len = self.opman1.init_cursor()
        self.assertEqual(cursor, None)
        self.assertEqual(cursor_len, 0)
        self.assertEqual(self.opman1.checkpoint, None)
        cursor, cursor_len = self.opman2.init_cursor()
        self.assertEqual(cursor, None)
        self.assertEqual(cursor_len, 0)
        self.assertEqual(self.opman2.checkpoint, None)

        # No last checkpoint, empty collections, something in oplog
        self.opman1.oplog = self.shard1_conn["local"]["oplog.rs"]
        self.opman2.oplog = self.shard2_conn["local"]["oplog.rs"]
        oplog_startup_ts = self.opman2.get_last_oplog_timestamp()
        collection = self.mongos_conn["test"]["mcsharded"]
        collection.insert_one({"i": 1})
        collection.delete_one({"i": 1})
        time.sleep(3)
        last_ts1 = self.opman1.get_last_oplog_timestamp()
        cursor, cursor_len = self.opman1.init_cursor()
        self.assertEqual(cursor_len, 0)
        self.assertEqual(self.opman1.checkpoint, last_ts1)
        with self.opman1.oplog_progress as prog:
            self.assertEqual(prog.get_dict()[str(self.opman1.oplog)], last_ts1)
        # init_cursor should point to startup message in shard2 oplog
        cursor, cursor_len = self.opman2.init_cursor()
        self.assertEqual(cursor_len, 0)
        self.assertEqual(self.opman2.checkpoint, oplog_startup_ts)

        # No last checkpoint, no collection dump, stuff in oplog
        progress = LockingDict()
        self.opman1.oplog_progress = self.opman2.oplog_progress = progress
        self.opman1.collection_dump = self.opman2.collection_dump = False
        collection.insert_one({"i": 1200})
        last_ts2 = self.opman2.get_last_oplog_timestamp()
        self.opman1.init_cursor()
        self.assertEqual(self.opman1.checkpoint, last_ts1)
        with self.opman1.oplog_progress as prog:
            self.assertEqual(prog.get_dict()[str(self.opman1.oplog)], last_ts1)
        cursor, cursor_len = self.opman2.init_cursor()
        for i in range(cursor_len - 1):
            next(cursor)
        self.assertEqual(next(cursor)["o"]["i"], 1200)
        self.assertEqual(self.opman2.checkpoint, last_ts2)
        with self.opman2.oplog_progress as prog:
            self.assertEqual(prog.get_dict()[str(self.opman2.oplog)], last_ts2)

        # Last checkpoint exists
        progress = LockingDict()
        self.opman1.oplog_progress = self.opman2.oplog_progress = progress
        for i in range(1000):
            collection.insert_one({"i": i + 500})
        entry1 = list(
            self.shard1_conn["local"]["oplog.rs"].find(skip=200, limit=-2))
        entry2 = list(
            self.shard2_conn["local"]["oplog.rs"].find(skip=200, limit=-2))
        progress.get_dict()[str(self.opman1.oplog)] = entry1[0]["ts"]
        progress.get_dict()[str(self.opman2.oplog)] = entry2[0]["ts"]
        self.opman1.oplog_progress = self.opman2.oplog_progress = progress
        self.opman1.checkpoint = self.opman2.checkpoint = None
        cursor1, cursor_len1 = self.opman1.init_cursor()
        cursor2, cursor_len2 = self.opman2.init_cursor()
        self.assertEqual(entry1[1]["ts"], next(cursor1)["ts"])
        self.assertEqual(entry2[1]["ts"], next(cursor2)["ts"])
        self.assertEqual(self.opman1.checkpoint, entry1[0]["ts"])
        self.assertEqual(self.opman2.checkpoint, entry2[0]["ts"])
        with self.opman1.oplog_progress as prog:
            self.assertEqual(prog.get_dict()[str(self.opman1.oplog)],
                             entry1[0]["ts"])
        with self.opman2.oplog_progress as prog:
            self.assertEqual(prog.get_dict()[str(self.opman2.oplog)],
                             entry2[0]["ts"])

        # Last checkpoint is behind
        progress = LockingDict()
        progress.get_dict()[str(self.opman1.oplog)] = bson.Timestamp(1, 0)
        progress.get_dict()[str(self.opman2.oplog)] = bson.Timestamp(1, 0)
        self.opman1.oplog_progress = self.opman2.oplog_progress = progress
        self.opman1.checkpoint = self.opman2.checkpoint = None
        cursor, cursor_len = self.opman1.init_cursor()
        self.assertEqual(cursor_len, 0)
        self.assertEqual(cursor, None)
        self.assertIsNotNone(self.opman1.checkpoint)
        cursor, cursor_len = self.opman2.init_cursor()
        self.assertEqual(cursor_len, 0)
        self.assertEqual(cursor, None)
        self.assertIsNotNone(self.opman2.checkpoint)

    def test_rollback(self):
        """Test the rollback method in a sharded environment

        Cases:
        1. Documents on both shards, rollback on one shard
        2. Documents on both shards, rollback on both shards

        """

        self.opman1.start()
        self.opman2.start()

        # Insert first documents while primaries are up
        db_main = self.mongos_conn["test"]["mcsharded"]
        db_main2 = db_main.with_options(write_concern=WriteConcern(w=2))
        db_main2.insert_one({"i": 0})
        db_main2.insert_one({"i": 1000})
        self.assertEqual(self.shard1_conn["test"]["mcsharded"].count(), 1)
        self.assertEqual(self.shard2_conn["test"]["mcsharded"].count(), 1)

        # Case 1: only one primary goes down, shard1 in this case
        self.cluster.shards[0].primary.stop(destroy=False)

        # Wait for the secondary to be promoted
        shard1_secondary_admin = self.shard1_secondary_conn["admin"]
        assert_soon(
            lambda: shard1_secondary_admin.command("isMaster")["ismaster"])

        # Insert another document. This will be rolled back later
        def cond():
            try:
                db_main.insert_one({"i": 1})
            except:
                pass
            return db_main.find_one({"i": 1})
        retry_until_ok(cond)
        db_secondary1 = self.shard1_secondary_conn["test"]["mcsharded"]
        db_secondary2 = self.shard2_secondary_conn["test"]["mcsharded"]
        self.assertEqual(db_secondary1.count(), 2)

        # Wait for replication on the doc manager
        # Note that both OplogThreads share the same doc manager
        c = lambda: len(self.opman1.doc_managers[0]._search()) == 3
        assert_soon(c, "not all writes were replicated to doc manager",
                    max_tries=120)

        # Kill the new primary
        self.cluster.shards[0].secondary.stop(destroy=False)

        # Start both servers back up
        self.cluster.shards[0].primary.start()
        primary_admin = self.shard1_conn["admin"]
        c = lambda: primary_admin.command("isMaster")["ismaster"]
        assert_soon(lambda: retry_until_ok(c))
        self.cluster.shards[0].secondary.start()
        secondary_admin = self.shard1_secondary_conn["admin"]
        c = lambda: secondary_admin.command("replSetGetStatus")["myState"] == 2
        assert_soon(c)
        query = {"i": {"$lt": 1000}}
        assert_soon(lambda: retry_until_ok(db_main.find(query).count) > 0)

        # Only first document should exist in MongoDB
        self.assertEqual(db_main.find(query).count(), 1)
        self.assertEqual(db_main.find_one(query)["i"], 0)

        def check_docman_rollback():
            docman_docs = [d for d in self.opman1.doc_managers[0]._search()
                           if d["i"] < 1000]
            return len(docman_docs) == 1 and docman_docs[0]["i"] == 0
        assert_soon(check_docman_rollback,
                    "doc manager did not roll back")

        # Wait for previous rollback to complete.
        # Insert/delete one document to jump-start replication to secondaries
        # in MongoDB 3.x.
        db_main.insert_one({'i': -1})
        db_main.delete_one({'i': -1})

        def rollback_done():
            secondary1_count = retry_until_ok(db_secondary1.count)
            secondary2_count = retry_until_ok(db_secondary2.count)
            return (1, 1) == (secondary1_count, secondary2_count)
        assert_soon(rollback_done,
                    "rollback never replicated to one or more secondaries")

        ##############################

        # Case 2: Primaries on both shards go down
        self.cluster.shards[0].primary.stop(destroy=False)
        self.cluster.shards[1].primary.stop(destroy=False)

        # Wait for the secondaries to be promoted
        shard1_secondary_admin = self.shard1_secondary_conn["admin"]
        shard2_secondary_admin = self.shard2_secondary_conn["admin"]
        assert_soon(
            lambda: shard1_secondary_admin.command("isMaster")["ismaster"])
        assert_soon(
            lambda: shard2_secondary_admin.command("isMaster")["ismaster"])

        # Insert another document on each shard. These will be rolled back later
        retry_until_ok(db_main.insert_one, {"i": 1})
        self.assertEqual(db_secondary1.count(), 2)
        retry_until_ok(db_main.insert_one, {"i": 1001})
        self.assertEqual(db_secondary2.count(), 2)

        # Wait for replication on the doc manager
        c = lambda: len(self.opman1.doc_managers[0]._search()) == 4
        assert_soon(c, "not all writes were replicated to doc manager")

        # Kill the new primaries
        self.cluster.shards[0].secondary.stop(destroy=False)
        self.cluster.shards[1].secondary.stop(destroy=False)

        # Start the servers back up...
        # Shard 1
        self.cluster.shards[0].primary.start()
        c = lambda: self.shard1_conn['admin'].command("isMaster")["ismaster"]
        assert_soon(lambda: retry_until_ok(c))
        self.cluster.shards[0].secondary.start()
        secondary_admin = self.shard1_secondary_conn["admin"]
        c = lambda: secondary_admin.command("replSetGetStatus")["myState"] == 2
        assert_soon(c)
        # Shard 2
        self.cluster.shards[1].primary.start()
        c = lambda: self.shard2_conn['admin'].command("isMaster")["ismaster"]
        assert_soon(lambda: retry_until_ok(c))
        self.cluster.shards[1].secondary.start()
        secondary_admin = self.shard2_secondary_conn["admin"]
        c = lambda: secondary_admin.command("replSetGetStatus")["myState"] == 2
        assert_soon(c)

        # Wait for the shards to come online
        assert_soon(lambda: retry_until_ok(db_main.find(query).count) > 0)
        query2 = {"i": {"$gte": 1000}}
        assert_soon(lambda: retry_until_ok(db_main.find(query2).count) > 0)

        # Only first documents should exist in MongoDB
        self.assertEqual(db_main.find(query).count(), 1)
        self.assertEqual(db_main.find_one(query)["i"], 0)
        self.assertEqual(db_main.find(query2).count(), 1)
        self.assertEqual(db_main.find_one(query2)["i"], 1000)

        # Same should hold for the doc manager
        assert_soon(lambda: len(self.opman1.doc_managers[0]._search()) == 2)
        i_values = [d["i"] for d in self.opman1.doc_managers[0]._search()]
        self.assertIn(0, i_values)
        self.assertIn(1000, i_values)

    def test_with_chunk_migration(self):
        """Test that DocManagers have proper state after both a successful
        and an unsuccessful chunk migration
        """

        # Start replicating to dummy doc managers
        self.opman1.start()
        self.opman2.start()

        collection = self.mongos_conn["test"]["mcsharded"]
        for i in range(1000):
            collection.insert_one({"i": i + 500})
        # Assert current state of the mongoverse
        self.assertEqual(self.shard1_conn["test"]["mcsharded"].find().count(),
                         500)
        self.assertEqual(self.shard2_conn["test"]["mcsharded"].find().count(),
                         500)
        assert_soon(lambda: len(self.opman1.doc_managers[0]._search()) == 1000)

        # Test successful chunk move from shard 1 to shard 2
        self.mongos_conn["admin"].command(
            "moveChunk",
            "test.mcsharded",
            find={"i": 1},
            to="demo-set-1"
        )

        # doc manager should still have all docs
        all_docs = self.opman1.doc_managers[0]._search()
        self.assertEqual(len(all_docs), 1000)
        for i, doc in enumerate(sorted(all_docs, key=lambda x: x["i"])):
            self.assertEqual(doc["i"], i + 500)

        # Mark the collection as "dropped". This will cause migration to fail.
        self.mongos_conn["config"]["collections"].update_one(
            {"_id": "test.mcsharded"},
            {"$set": {"dropped": True}}
        )

        # Test unsuccessful chunk move from shard 2 to shard 1
        def fail_to_move_chunk():
            self.mongos_conn["admin"].command(
                "moveChunk",
                "test.mcsharded",
                find={"i": 1},
                to="demo-set-0"
            )
        self.assertRaises(pymongo.errors.OperationFailure, fail_to_move_chunk)
        # doc manager should still have all docs
        all_docs = self.opman1.doc_managers[0]._search()
        self.assertEqual(len(all_docs), 1000)
        for i, doc in enumerate(sorted(all_docs, key=lambda x: x["i"])):
            self.assertEqual(doc["i"], i + 500)

    def test_with_orphan_documents(self):
        """Test that DocManagers have proper state after a chunk migration
        that resuts in orphaned documents.
        """
        # Start replicating to dummy doc managers
        self.opman1.start()
        self.opman2.start()

        collection = self.mongos_conn["test"]["mcsharded"]
        collection.insert_many([{"i": i + 500} for i in range(1000)])
        # Assert current state of the mongoverse
        self.assertEqual(self.shard1_conn["test"]["mcsharded"].find().count(),
                         500)
        self.assertEqual(self.shard2_conn["test"]["mcsharded"].find().count(),
                         500)
        assert_soon(lambda: len(self.opman1.doc_managers[0]._search()) == 1000)

        # Stop replication using the 'rsSyncApplyStop' failpoint
        self.shard1_conn.admin.command(
            "configureFailPoint", "rsSyncApplyStop",
            mode="alwaysOn"
        )

        # Move a chunk from shard2 to shard1
        def move_chunk():
            try:
                self.mongos_conn["admin"].command(
                    "moveChunk",
                    "test.mcsharded",
                    find={"i": 1000},
                    to="demo-set-0"
                )
            except pymongo.errors.OperationFailure:
                pass

        # moveChunk will never complete, so use another thread to continue
        mover = threading.Thread(target=move_chunk)
        mover.start()

        # wait for documents to start moving to shard 1
        assert_soon(lambda: self.shard1_conn.test.mcsharded.count() > 500)

        # Get opid for moveChunk command
        operations = self.mongos_conn.test.current_op()
        opid = None
        for op in operations["inprog"]:
            if op.get("query", {}).get("moveChunk"):
                opid = op["opid"]

        if opid is None:
            raise SkipTest("could not find moveChunk operation, cannot test "
                           "failed moveChunk")
        # Kill moveChunk with the opid

        if self.mongos_conn.server_info()['versionArray'][:3] >= [3, 1, 2]:
            self.mongos_conn.admin.command('killOp', op=opid)
        else:
            self.mongos_conn["test"]["$cmd.sys.killop"].find_one({"op": opid})

        # Mongo Connector should not become confused by unsuccessful chunk move
        docs = self.opman1.doc_managers[0]._search()
        self.assertEqual(len(docs), 1000)
        self.assertEqual(sorted(d["i"] for d in docs),
                         list(range(500, 1500)))
        self.shard1_conn.admin.command(
            "configureFailPoint", "rsSyncApplyStop",
            mode="off"
        )
        # cleanup
        mover.join()
class TestOplogManager(unittest.TestCase):
    """Defines all the testing methods, as well as a method that sets up the
        cluster
    """

    def setUp(self):
        self.repl_set = ReplicaSetSingle().start()
        self.primary_conn = self.repl_set.client()
        self.oplog_coll = self.primary_conn.local["oplog.rs"]
        self.opman = OplogThread(
            primary_client=self.primary_conn,
            doc_managers=(DocManager(),),
            oplog_progress_dict=LockingDict(),
            namespace_config=NamespaceConfig(
                namespace_options={"test.*": True, "gridfs.*": {"gridfs": True}}
            ),
        )

    def tearDown(self):
        try:
            self.opman.join()
        except RuntimeError:
            pass  # OplogThread may not have been started
        self.primary_conn.drop_database("test")
        close_client(self.primary_conn)
        self.repl_set.stop()

    def test_get_oplog_cursor(self):
        """Test the get_oplog_cursor method"""

        # timestamp is None - all oplog entries excluding no-ops are returned.
        cursor = self.opman.get_oplog_cursor(None)
        self.assertEqual(
            cursor.count(),
            self.primary_conn["local"]["oplog.rs"].find({"op": {"$ne": "n"}}).count(),
        )

        # earliest entry is the only one at/after timestamp
        doc = {"ts": bson.Timestamp(1000, 0), "i": 1}
        self.primary_conn["test"]["test"].insert_one(doc)
        latest_timestamp = self.opman.get_last_oplog_timestamp()
        cursor = self.opman.get_oplog_cursor(latest_timestamp)
        self.assertNotEqual(cursor, None)
        self.assertEqual(cursor.count(), 1)
        next_entry_id = next(cursor)["o"]["_id"]
        retrieved = self.primary_conn.test.test.find_one(next_entry_id)
        self.assertEqual(retrieved, doc)

        # many entries before and after timestamp
        self.primary_conn["test"]["test"].insert_many(
            [{"i": i} for i in range(2, 1002)]
        )
        oplog_cursor = self.oplog_coll.find(
            {"op": {"$ne": "n"}, "ns": {"$not": re.compile(r"\.(system|\$cmd)")}},
            sort=[("ts", pymongo.ASCENDING)],
        )

        # initial insert + 1000 more inserts
        self.assertEqual(oplog_cursor.count(), 1 + 1000)
        pivot = oplog_cursor.skip(400).limit(-1)[0]

        goc_cursor = self.opman.get_oplog_cursor(pivot["ts"])
        self.assertEqual(goc_cursor.count(), 1 + 1000 - 400)

    def test_get_last_oplog_timestamp(self):
        """Test the get_last_oplog_timestamp method"""

        # "empty" the oplog
        self.opman.oplog = self.primary_conn["test"]["emptycollection"]
        self.assertEqual(self.opman.get_last_oplog_timestamp(), None)

        # Test non-empty oplog
        self.opman.oplog = self.primary_conn["local"]["oplog.rs"]
        for i in range(1000):
            self.primary_conn["test"]["test"].insert_one({"i": i + 500})
        oplog = self.primary_conn["local"]["oplog.rs"]
        oplog = oplog.find().sort("$natural", pymongo.DESCENDING).limit(-1)[0]
        self.assertEqual(self.opman.get_last_oplog_timestamp(), oplog["ts"])

    def test_dump_collection(self):
        """Test the dump_collection method

        Cases:

        1. empty oplog
        2. non-empty oplog, with gridfs collections
        3. non-empty oplog, specified a namespace-set, none of the oplog
           entries are for collections in the namespace-set
        """

        # Test with empty oplog
        self.opman.oplog = self.primary_conn["test"]["emptycollection"]
        last_ts = self.opman.dump_collection()
        self.assertEqual(last_ts, None)

        # Test with non-empty oplog with gridfs collections
        self.opman.oplog = self.primary_conn["local"]["oplog.rs"]
        # Insert 10 gridfs files
        for i in range(10):
            fs = gridfs.GridFS(self.primary_conn["gridfs"], collection="test" + str(i))
            fs.put(b"hello world")
        # Insert 1000 documents
        for i in range(1000):
            self.primary_conn["test"]["test"].insert_one({"i": i + 500})
        last_ts = self.opman.get_last_oplog_timestamp()
        self.assertEqual(last_ts, self.opman.dump_collection())
        self.assertEqual(len(self.opman.doc_managers[0]._search()), 1010)

        # Case 3
        # 1MB oplog so that we can rollover quickly
        repl_set = ReplicaSetSingle(oplogSize=1).start()
        conn = repl_set.client()
        opman = OplogThread(
            primary_client=conn,
            doc_managers=(DocManager(),),
            oplog_progress_dict=LockingDict(),
            namespace_config=NamespaceConfig(namespace_set=["test.test"]),
        )
        # Insert a document into an included collection
        conn["test"]["test"].insert_one({"test": 1})
        # Cause the oplog to rollover on a non-included collection
        while conn["local"]["oplog.rs"].find_one({"ns": "test.test"}):
            conn["test"]["ignored"].insert_many(
                [{"test": "1" * 1024} for _ in range(1024)]
            )
        last_ts = opman.get_last_oplog_timestamp()
        self.assertEqual(last_ts, opman.dump_collection())
        self.assertEqual(len(opman.doc_managers[0]._search()), 1)
        conn.close()
        repl_set.stop()

    def test_skipped_oplog_entry_updates_checkpoint(self):
        repl_set = ReplicaSetSingle().start()
        conn = repl_set.client()
        opman = OplogThread(
            primary_client=conn,
            doc_managers=(DocManager(),),
            oplog_progress_dict=LockingDict(),
            namespace_config=NamespaceConfig(namespace_set=["test.test"]),
        )
        opman.start()

        # Insert a document into an included collection
        conn["test"]["test"].insert_one({"test": 1})
        last_ts = opman.get_last_oplog_timestamp()
        assert_soon(
            lambda: last_ts == opman.checkpoint,
            "OplogThread never updated checkpoint to non-skipped " "entry.",
        )
        self.assertEqual(len(opman.doc_managers[0]._search()), 1)

        # Make sure that the oplog thread updates its checkpoint on every
        # oplog entry.
        conn["test"]["ignored"].insert_one({"test": 1})
        last_ts = opman.get_last_oplog_timestamp()
        assert_soon(
            lambda: last_ts == opman.checkpoint,
            "OplogThread never updated checkpoint to skipped entry.",
        )
        opman.join()
        conn.close()
        repl_set.stop()

    def test_dump_collection_with_error(self):
        """Test the dump_collection method with invalid documents.

        Cases:

        1. non-empty oplog, continue_on_error=True, invalid documents
        """

        # non-empty oplog, continue_on_error=True, invalid documents
        self.opman.continue_on_error = True
        self.opman.oplog = self.primary_conn["local"]["oplog.rs"]

        docs = [{"a": i} for i in range(100)]
        for i in range(50, 60):
            docs[i]["_upsert_exception"] = True
        self.primary_conn["test"]["test"].insert_many(docs)

        last_ts = self.opman.get_last_oplog_timestamp()
        self.assertEqual(last_ts, self.opman.dump_collection())
        docs = self.opman.doc_managers[0]._search()
        docs.sort(key=lambda doc: doc["a"])

        self.assertEqual(len(docs), 90)
        expected_a = itertools.chain(range(0, 50), range(60, 100))
        for doc, correct_a in zip(docs, expected_a):
            self.assertEqual(doc["a"], correct_a)

    def test_dump_collection_cancel(self):
        """Test that dump_collection returns None when cancelled."""
        self.primary_conn["test"]["test"].insert_one({"test": "1"})

        # Pretend that the OplogThead was cancelled
        self.opman.running = False
        self.assertIsNone(self.opman.dump_collection())

    def test_init_cursor(self):
        """Test the init_cursor method

        Cases:

        1. no last checkpoint, no collection dump
        2. no last checkpoint, collection dump ok and stuff to dump
        3. no last checkpoint, nothing to dump, stuff in oplog
        4. no last checkpoint, nothing to dump, nothing in oplog
        5. no last checkpoint, no collection dump, stuff in oplog
        6. last checkpoint exists
        7. last checkpoint is behind
        """

        # N.B. these sub-cases build off of each other and cannot be re-ordered
        # without side-effects

        # No last checkpoint, no collection dump, nothing in oplog
        # "change oplog collection" to put nothing in oplog
        self.opman.oplog = self.primary_conn["test"]["emptycollection"]
        self.opman.collection_dump = False
        self.assertTrue(all(doc["op"] == "n" for doc in self.opman.init_cursor()[0]))
        self.assertEqual(self.opman.checkpoint, None)

        # No last checkpoint, empty collections, nothing in oplog
        self.opman.collection_dump = True
        cursor, cursor_empty = self.opman.init_cursor()
        self.assertEqual(cursor, None)
        self.assertTrue(cursor_empty)
        self.assertEqual(self.opman.checkpoint, None)

        # No last checkpoint, empty collections, something in oplog
        self.opman.oplog = self.primary_conn["local"]["oplog.rs"]
        collection = self.primary_conn["test"]["test"]
        collection.insert_one({"i": 1})
        collection.delete_one({"i": 1})
        time.sleep(3)
        last_ts = self.opman.get_last_oplog_timestamp()
        cursor, cursor_empty = self.opman.init_cursor()
        self.assertFalse(cursor_empty)
        self.assertEqual(self.opman.checkpoint, last_ts)
        self.assertEqual(self.opman.read_last_checkpoint(), last_ts)

        # No last checkpoint, no collection dump, something in oplog
        # If collection dump is false the checkpoint should not be set
        self.opman.checkpoint = None
        self.opman.oplog_progress = LockingDict()
        self.opman.collection_dump = False
        collection.insert_one({"i": 2})
        cursor, cursor_empty = self.opman.init_cursor()
        for doc in cursor:
            last_doc = doc
        self.assertEqual(last_doc["o"]["i"], 2)
        self.assertIsNone(self.opman.checkpoint)

        # Last checkpoint exists, no collection dump, something in oplog
        collection.insert_many([{"i": i + 500} for i in range(1000)])
        entry = list(self.primary_conn["local"]["oplog.rs"].find(skip=200, limit=-2))
        self.opman.update_checkpoint(entry[0]["ts"])
        cursor, cursor_empty = self.opman.init_cursor()
        self.assertEqual(next(cursor)["ts"], entry[1]["ts"])
        self.assertEqual(self.opman.checkpoint, entry[0]["ts"])
        self.assertEqual(self.opman.read_last_checkpoint(), entry[0]["ts"])

        # Last checkpoint is behind
        self.opman.update_checkpoint(bson.Timestamp(1, 0))
        cursor, cursor_empty = self.opman.init_cursor()
        self.assertTrue(cursor_empty)
        self.assertEqual(cursor, None)
        self.assertEqual(self.opman.checkpoint, bson.Timestamp(1, 0))

    def test_namespace_mapping(self):
        """Test mapping of namespaces
        Cases:

        upsert/delete/update of documents:
        1. in namespace set, mapping provided
        2. outside of namespace set, mapping provided
        """

        source_ns = ["test.test1", "test.test2"]
        phony_ns = ["test.phony1", "test.phony2"]
        dest_mapping = {
            "test.test1": "test.test1_dest",
            "test.test2": "test.test2_dest",
        }
        self.opman.namespace_config = NamespaceConfig(
            namespace_set=source_ns, namespace_options=dest_mapping
        )
        docman = self.opman.doc_managers[0]
        # start replicating
        self.opman.start()

        base_doc = {"_id": 1, "name": "superman"}

        # doc in namespace set
        for ns in source_ns:
            db, coll = ns.split(".", 1)

            # test insert
            self.primary_conn[db][coll].insert_one(base_doc)

            assert_soon(lambda: len(docman._search()) == 1)
            self.assertEqual(docman._search()[0]["ns"], dest_mapping[ns])
            bad = [d for d in docman._search() if d["ns"] == ns]
            self.assertEqual(len(bad), 0)

            # test update
            self.primary_conn[db][coll].update_one(
                {"_id": 1}, {"$set": {"weakness": "kryptonite"}}
            )

            def update_complete():
                docs = docman._search()
                for d in docs:
                    if d.get("weakness") == "kryptonite":
                        return True
                    return False

            assert_soon(update_complete)
            self.assertEqual(docman._search()[0]["ns"], dest_mapping[ns])
            bad = [d for d in docman._search() if d["ns"] == ns]
            self.assertEqual(len(bad), 0)

            # test delete
            self.primary_conn[db][coll].delete_one({"_id": 1})
            assert_soon(lambda: len(docman._search()) == 0)
            bad = [d for d in docman._search() if d["ns"] == dest_mapping[ns]]
            self.assertEqual(len(bad), 0)

            # cleanup
            self.primary_conn[db][coll].delete_many({})
            self.opman.doc_managers[0]._delete()

        # doc not in namespace set
        for ns in phony_ns:
            db, coll = ns.split(".", 1)

            # test insert
            self.primary_conn[db][coll].insert_one(base_doc)
            time.sleep(1)
            self.assertEqual(len(docman._search()), 0)
            # test update
            self.primary_conn[db][coll].update_one(
                {"_id": 1}, {"$set": {"weakness": "kryptonite"}}
            )
            time.sleep(1)
            self.assertEqual(len(docman._search()), 0)

    def test_many_targets(self):
        """Test that one OplogThread is capable of replicating to more than
        one target.
        """
        doc_managers = [DocManager(), DocManager(), DocManager()]
        self.opman.doc_managers = doc_managers

        # start replicating
        self.opman.start()
        self.primary_conn["test"]["test"].insert_one(
            {"name": "kermit", "color": "green"}
        )
        self.primary_conn["test"]["test"].insert_one(
            {"name": "elmo", "color": "firetruck red"}
        )

        assert_soon(
            lambda: sum(len(d._search()) for d in doc_managers) == 6,
            "OplogThread should be able to replicate to multiple targets",
        )

        self.primary_conn["test"]["test"].delete_one({"name": "elmo"})

        assert_soon(
            lambda: sum(len(d._search()) for d in doc_managers) == 3,
            "OplogThread should be able to replicate to multiple targets",
        )
        for d in doc_managers:
            self.assertEqual(d._search()[0]["name"], "kermit")

    def test_upgrade_oplog_progress(self):
        first_oplog_ts = self.opman.oplog.find_one()["ts"]
        # Old format oplog progress file:
        progress = {str(self.opman.oplog): bson_ts_to_long(first_oplog_ts)}
        # Set up oplog managers to use the old format.
        oplog_progress = LockingDict()
        oplog_progress.dict = progress
        self.opman.oplog_progress = oplog_progress
        # Cause the oplog managers to update their checkpoints.
        self.opman.update_checkpoint(first_oplog_ts)
        # New format should be in place now.
        new_format = {self.opman.replset_name: first_oplog_ts}
        self.assertEqual(new_format, self.opman.oplog_progress.get_dict())
class TestFilterFields(unittest.TestCase):

    @classmethod
    def setUpClass(cls):
        cls.repl_set = ReplicaSetSingle().start()
        cls.primary_conn = cls.repl_set.client()
        cls.oplog_coll = cls.primary_conn.local['oplog.rs']

    @classmethod
    def tearDownClass(cls):
        cls.primary_conn.drop_database("test")
        close_client(cls.primary_conn)
        cls.repl_set.stop()

    def setUp(self):
        self.dest_mapping_stru = DestMapping([], [], {})
        self.opman = OplogThread(
            primary_client=self.primary_conn,
            doc_managers=(DocManager(),),
            oplog_progress_dict=LockingDict(),
            dest_mapping_stru=self.dest_mapping_stru
        )

    def tearDown(self):
        try:
            self.opman.join()
        except RuntimeError:
            # OplogThread may not have been started
            pass

    def _check_fields(self, opman, fields, exclude_fields, projection):
        if fields:
            self.assertEqual(sorted(opman.fields), sorted(fields))
            self.assertEqual(opman._fields, set(fields))
        else:
            self.assertEqual(opman.fields, None)
            self.assertEqual(opman._fields, set([]))
        if exclude_fields:
            self.assertEqual(sorted(opman.exclude_fields),
                             sorted(exclude_fields))
            self.assertEqual(opman._exclude_fields, set(exclude_fields))
        else:
            self.assertEqual(opman.exclude_fields, None)
            self.assertEqual(opman._exclude_fields, set([]))

        self.assertEqual(opman._projection, projection)

    def test_filter_fields(self):
        docman = self.opman.doc_managers[0]
        conn = self.opman.primary_client

        include_fields = ["a", "b", "c"]
        exclude_fields = ["d", "e", "f"]

        # Set fields to care about
        self.opman.fields = include_fields
        # Documents have more than just these fields
        doc = {
            "a": 1, "b": 2, "c": 3,
            "d": 4, "e": 5, "f": 6,
            "_id": 1
        }
        db = conn['test']['test']
        db.insert_one(doc)
        assert_soon(lambda: db.count() == 1)
        self.opman.dump_collection()

        result = docman._search()[0]
        keys = result.keys()
        for inc, exc in zip(include_fields, exclude_fields):
            self.assertIn(inc, keys)
            self.assertNotIn(exc, keys)

    def test_filter_exclude_oplog_entry(self):
        # Test oplog entries: these are callables, since
        # filter_oplog_entry modifies the oplog entry in-place
        insert_op = lambda: {
            "op": "i",
            "o": {
                "_id": 0,
                "a": 1,
                "b": 2,
                "c": 3
            }
        }
        update_op = lambda: {
            "op": "u",
            "o": {
                "$set": {
                    "a": 4,
                    "b": 5
                },
                "$unset": {
                    "c": True
                }
            },
            "o2": {
                "_id": 1
            }
        }

        # Case 0: insert op, no fields provided
        self.opman.exclude_fields = None
        filtered = self.opman.filter_oplog_entry(insert_op())
        self.assertEqual(filtered, insert_op())
        self.assertEqual(None, self.opman._projection)

        # Case 1: insert op, fields provided
        self.opman.exclude_fields = ['c']
        filtered = self.opman.filter_oplog_entry(insert_op())
        self.assertEqual(filtered['o'], {'_id': 0, 'a': 1, 'b': 2})
        self.assertEqual({'c': 0}, self.opman._projection)

        # Case 2: insert op, fields provided, doc becomes empty except for _id
        self.opman.exclude_fields = ['a', 'b', 'c']
        filtered = self.opman.filter_oplog_entry(insert_op())
        self.assertEqual(filtered['o'], {'_id': 0})
        self.assertEqual({'a': 0, 'b': 0, 'c': 0}, self.opman._projection)

        # Case 3: update op, no fields provided
        self.opman.exclude_fields = None
        filtered = self.opman.filter_oplog_entry(update_op())
        self.assertEqual(filtered, update_op())
        self.assertEqual(None, self.opman._projection)

        # Case 4: update op, fields provided
        self.opman.exclude_fields = ['b']
        filtered = self.opman.filter_oplog_entry(update_op())
        self.assertNotIn('b', filtered['o']['$set'])
        self.assertIn('a', filtered['o']['$set'])
        self.assertEqual(filtered['o']['$unset'], update_op()['o']['$unset'])
        self.assertEqual({'b': 0}, self.opman._projection)

        # Case 5: update op, fields provided, empty $set
        self.opman.exclude_fields = ['a', 'b']
        filtered = self.opman.filter_oplog_entry(update_op())
        self.assertNotIn('$set', filtered['o'])
        self.assertEqual(filtered['o']['$unset'], update_op()['o']['$unset'])
        self.assertEqual({'a': 0, 'b': 0}, self.opman._projection)

        # Case 6: update op, fields provided, empty $unset
        self.opman.exclude_fields = ['c']
        filtered = self.opman.filter_oplog_entry(update_op())
        self.assertNotIn('$unset', filtered['o'])
        self.assertEqual(filtered['o']['$set'], update_op()['o']['$set'])
        self.assertEqual({'c': 0}, self.opman._projection)

        # Case 7: update op, fields provided, entry is nullified
        self.opman.exclude_fields = ['a', 'b', 'c']
        filtered = self.opman.filter_oplog_entry(update_op())
        self.assertEqual(filtered, None)
        self.assertEqual({'a': 0, 'b': 0, 'c': 0}, self.opman._projection)

        # Case 8: update op, fields provided, replacement
        self.opman.exclude_fields = ['d', 'e', 'f']
        filtered = self.opman.filter_oplog_entry({
            'op': 'u',
            'o': {'a': 1, 'b': 2, 'c': 3, 'd': 4}
        })
        self.assertEqual(
            filtered, {'op': 'u', 'o': {'a': 1, 'b': 2, 'c': 3}})
        self.assertEqual({'d': 0, 'e': 0, 'f': 0}, self.opman._projection)

    def test_filter_oplog_entry(self):
        # Test oplog entries: these are callables, since
        # filter_oplog_entry modifies the oplog entry in-place
        insert_op = lambda: {
            "op": "i",
            "o": {
                "_id": 0,
                "a": 1,
                "b": 2,
                "c": 3
            }
        }
        update_op = lambda: {
            "op": "u",
            "o": {
                "$set": {
                    "a": 4,
                    "b": 5
                },
                "$unset": {
                    "c": True
                }
            },
            "o2": {
                "_id": 1
            }
        }

        # Case 0: insert op, no fields provided
        self.opman.fields = None
        filtered = self.opman.filter_oplog_entry(insert_op())
        self.assertEqual(filtered, insert_op())
        self.assertEqual(None, self.opman._projection)

        # Case 1: insert op, fields provided
        self.opman.fields = ['a', 'b']
        filtered = self.opman.filter_oplog_entry(insert_op())
        self.assertEqual(filtered['o'], {'_id': 0, 'a': 1, 'b': 2})
        self.assertEqual({'_id': 1, 'a': 1, 'b': 1}, self.opman._projection)

        # Case 2: insert op, fields provided, doc becomes empty except for _id
        self.opman.fields = ['d', 'e', 'f']
        filtered = self.opman.filter_oplog_entry(insert_op())
        self.assertEqual(filtered['o'], {'_id': 0})
        self.assertEqual({'_id': 1, 'd': 1, 'e': 1, 'f': 1},
                         self.opman._projection)

        # Case 3: update op, no fields provided
        self.opman.fields = None
        filtered = self.opman.filter_oplog_entry(update_op())
        self.assertEqual(filtered, update_op())
        self.assertEqual(None, self.opman._projection)

        # Case 4: update op, fields provided
        self.opman.fields = ['a', 'c']
        filtered = self.opman.filter_oplog_entry(update_op())
        self.assertNotIn('b', filtered['o']['$set'])
        self.assertIn('a', filtered['o']['$set'])
        self.assertEqual(filtered['o']['$unset'], update_op()['o']['$unset'])
        self.assertEqual({'_id': 1, 'a': 1, 'c': 1}, self.opman._projection)

        # Case 5: update op, fields provided, empty $set
        self.opman.fields = ['c']
        filtered = self.opman.filter_oplog_entry(update_op())
        self.assertNotIn('$set', filtered['o'])
        self.assertEqual(filtered['o']['$unset'], update_op()['o']['$unset'])
        self.assertEqual({'_id': 1, 'c': 1}, self.opman._projection)

        # Case 6: update op, fields provided, empty $unset
        self.opman.fields = ['a', 'b']
        filtered = self.opman.filter_oplog_entry(update_op())
        self.assertNotIn('$unset', filtered['o'])
        self.assertEqual(filtered['o']['$set'], update_op()['o']['$set'])
        self.assertEqual({'_id': 1, 'a': 1, 'b': 1}, self.opman._projection)

        # Case 7: update op, fields provided, entry is nullified
        self.opman.fields = ['d', 'e', 'f']
        filtered = self.opman.filter_oplog_entry(update_op())
        self.assertEqual(filtered, None)
        self.assertEqual({'_id': 1, 'd': 1, 'e': 1, 'f': 1},
                         self.opman._projection)

        # Case 8: update op, fields provided, replacement
        self.opman.fields = ['a', 'b', 'c']
        filtered = self.opman.filter_oplog_entry({
            'op': 'u',
            'o': {'a': 1, 'b': 2, 'c': 3, 'd': 4}
        })
        self.assertEqual(
            filtered, {'op': 'u', 'o': {'a': 1, 'b': 2, 'c': 3}})
        self.assertEqual({'_id': 1, 'a': 1, 'b': 1, 'c': 1},
                         self.opman._projection)

    def test_exclude_fields_constructor(self):
        # Test with the "_id" field in exclude_fields
        exclude_fields = ["_id", "title", "content", "author"]
        opman = OplogThread(
            primary_client=self.primary_conn,
            doc_managers=(DocManager(),),
            oplog_progress_dict=LockingDict(),
            dest_mapping_stru=self.dest_mapping_stru,
            exclude_fields=exclude_fields
        )
        exclude_fields.remove('_id')
        self._check_fields(opman, [], exclude_fields,
                           dict((f, 0) for f in exclude_fields))
        extra_fields = exclude_fields + ['extra1', 'extra2']
        filtered = opman.filter_oplog_entry(
            {'op': 'i',
             'o': dict((f, 1) for f in extra_fields)})['o']
        self.assertEqual(dict((f, 1) for f in ['extra1', 'extra2']), filtered)

        # Test without "_id" field included in exclude_fields
        exclude_fields = ["title", "content", "author"]
        opman = OplogThread(
            primary_client=self.primary_conn,
            doc_managers=(DocManager(),),
            oplog_progress_dict=LockingDict(),
            dest_mapping_stru=self.dest_mapping_stru,
            exclude_fields=exclude_fields
        )
        self._check_fields(opman, [], exclude_fields,
                           dict((f, 0) for f in exclude_fields))
        extra_fields = extra_fields + ['extra1', 'extra2']
        filtered = opman.filter_oplog_entry(
            {'op': 'i',
             'o': dict((f, 1) for f in extra_fields)})['o']
        self.assertEqual({'extra1': 1, 'extra2': 1}, filtered)

        # Test with only "_id" field in exclude_fields
        exclude_fields = ["_id"]
        opman = OplogThread(
            primary_client=self.primary_conn,
            doc_managers=(DocManager(),),
            oplog_progress_dict=LockingDict(),
            dest_mapping_stru=self.dest_mapping_stru,
            exclude_fields=exclude_fields
        )
        self._check_fields(opman, [], [], None)
        extra_fields = exclude_fields + ['extra1', 'extra2']
        filtered = opman.filter_oplog_entry(
            {'op': 'i',
             'o': dict((f, 1) for f in extra_fields)})['o']
        self.assertEqual(dict((f, 1) for f in extra_fields), filtered)

        # Test with nothing set for exclude_fields
        opman = OplogThread(
            primary_client=self.primary_conn,
            doc_managers=(DocManager(),),
            oplog_progress_dict=LockingDict(),
            dest_mapping_stru=self.dest_mapping_stru,
            exclude_fields=None
        )
        self._check_fields(opman, [], [], None)
        extra_fields = ['_id', 'extra1', 'extra2']
        filtered = opman.filter_oplog_entry(
            {'op': 'i',
             'o': dict((f, 1) for f in extra_fields)})['o']
        self.assertEqual(dict((f, 1) for f in extra_fields), filtered)

    def test_fields_constructor(self):
        # Test with "_id" field in constructor
        fields = ["_id", "title", "content", "author"]
        opman = OplogThread(
            primary_client=self.primary_conn,
            doc_managers=(DocManager(),),
            oplog_progress_dict=LockingDict(),
            dest_mapping_stru=self.dest_mapping_stru,
            fields=fields
        )
        self._check_fields(opman, fields, [],
                           dict((f, 1) for f in fields))
        extra_fields = fields + ['extra1', 'extra2']
        filtered = opman.filter_oplog_entry(
            {'op': 'i',
             'o': dict((f, 1) for f in extra_fields)})['o']
        self.assertEqual(dict((f, 1) for f in fields), filtered)

        # Test without "_id" field in constructor
        fields = ["title", "content", "author"]
        opman = OplogThread(
            primary_client=self.primary_conn,
            doc_managers=(DocManager(),),
            oplog_progress_dict=LockingDict(),
            dest_mapping_stru=self.dest_mapping_stru,
            fields=fields
        )
        fields.append('_id')
        self._check_fields(opman, fields, [],
                           dict((f, 1) for f in fields))
        extra_fields = fields + ['extra1', 'extra2']
        filtered = opman.filter_oplog_entry(
            {'op': 'i',
             'o': dict((f, 1) for f in extra_fields)})['o']
        self.assertEqual(dict((f, 1) for f in fields), filtered)

        # Test with only "_id" field
        fields = ["_id"]
        opman = OplogThread(
            primary_client=self.primary_conn,
            doc_managers=(DocManager(),),
            oplog_progress_dict=LockingDict(),
            dest_mapping_stru=self.dest_mapping_stru,
            fields=fields
        )
        self._check_fields(opman, fields, [],
                           dict((f, 1) for f in fields))
        extra_fields = fields + ['extra1', 'extra2']
        filtered = opman.filter_oplog_entry(
            {'op': 'i',
             'o': dict((f, 1) for f in extra_fields)})['o']
        self.assertEqual({'_id': 1}, filtered)

        # Test with no fields set
        opman = OplogThread(
            primary_client=self.primary_conn,
            doc_managers=(DocManager(),),
            oplog_progress_dict=LockingDict(),
            dest_mapping_stru=self.dest_mapping_stru
        )
        self._check_fields(opman, [], [], None)
        extra_fields = ['_id', 'extra1', 'extra2']
        filtered = opman.filter_oplog_entry(
            {'op': 'i',
             'o': dict((f, 1) for f in extra_fields)})['o']
        self.assertEqual(dict((f, 1) for f in extra_fields), filtered)

    def test_exclude_fields_attr(self):
        # Test with the "_id" field in exclude_fields.
        exclude_fields = ["_id", "title", "content", "author"]
        exclude_fields.remove('_id')
        self.opman.exclude_fields = exclude_fields
        self._check_fields(self.opman, [], exclude_fields,
                           dict((f, 0) for f in exclude_fields))
        extra_fields = exclude_fields + ['extra1', 'extra2']
        filtered = self.opman.filter_oplog_entry(
            {'op': 'i',
             'o': dict((f, 1) for f in extra_fields)})['o']
        self.assertEqual(dict((f, 1) for f in ['extra1', 'extra2']), filtered)

        # Test without "_id" field included in exclude_fields
        exclude_fields = ["title", "content", "author"]
        self.opman.exclude_fields = exclude_fields
        self._check_fields(self.opman, [], exclude_fields,
                           dict((f, 0) for f in exclude_fields))
        extra_fields = extra_fields + ['extra1', 'extra2']
        filtered = self.opman.filter_oplog_entry(
            {'op': 'i',
             'o': dict((f, 1) for f in extra_fields)})['o']
        self.assertEqual({'extra1': 1, 'extra2': 1}, filtered)

        # Test with only "_id" field in exclude_fields
        exclude_fields = ["_id"]
        self.opman.exclude_fields = exclude_fields
        self._check_fields(self.opman, [], [], None)
        extra_fields = exclude_fields + ['extra1', 'extra2']
        filtered = self.opman.filter_oplog_entry(
            {'op': 'i',
             'o': dict((f, 1) for f in extra_fields)})['o']
        self.assertEqual(dict((f, 1) for f in extra_fields), filtered)

        # Test with nothing set for exclude_fields
        self.opman.exclude_fields = None
        self._check_fields(self.opman, [], [], None)
        extra_fields = ['_id', 'extra1', 'extra2']
        filtered = self.opman.filter_oplog_entry(
            {'op': 'i',
             'o': dict((f, 1) for f in extra_fields)})['o']
        self.assertEqual(dict((f, 1) for f in extra_fields), filtered)

    def test_fields_attr(self):
        # Test with "_id" field included in fields
        fields = ["_id", "title", "content", "author"]
        self.opman.fields = fields
        self._check_fields(self.opman, fields, [],
                           dict((f, 1) for f in fields))
        extra_fields = fields + ['extra1', 'extra2']
        filtered = self.opman.filter_oplog_entry(
            {'op': 'i',
             'o': dict((f, 1) for f in extra_fields)})['o']
        self.assertEqual(dict((f, 1) for f in fields), filtered)

        # Test without "_id" field included in fields
        fields = ["title", "content", "author"]
        self.opman.fields = fields
        fields.append('_id')
        self._check_fields(self.opman, fields, [],
                           dict((f, 1) for f in fields))
        extra_fields = fields + ['extra1', 'extra2']
        filtered = self.opman.filter_oplog_entry(
            {'op': 'i',
             'o': dict((f, 1) for f in extra_fields)})['o']
        self.assertEqual(dict((f, 1) for f in fields), filtered)

        # Test with only "_id" field
        fields = ["_id"]
        self.opman.fields = fields
        self._check_fields(self.opman, fields, [],
                           dict((f, 1) for f in fields))
        extra_fields = fields + ['extra1', 'extra2']
        filtered = self.opman.filter_oplog_entry(
            {'op': 'i',
             'o': dict((f, 1) for f in extra_fields)})['o']
        self.assertEqual({'_id': 1}, filtered)

        # Test with no fields set
        self.opman.fields = None
        self._check_fields(self.opman, [], [], None)
        extra_fields = ['_id', 'extra1', 'extra2']
        filtered = self.opman.filter_oplog_entry(
            {'op': 'i',
             'o': dict((f, 1) for f in extra_fields)})['o']
        self.assertEqual(dict((f, 1) for f in extra_fields), filtered)

    def test_nested_fields(self):
        def check_nested(document, fields, filtered_document, op='i'):
            self.opman.fields = fields
            fields.append('_id')
            self.assertEqual(set(fields), self.opman._fields)
            self.assertEqual(sorted(fields), sorted(self.opman.fields))
            filtered_result = self.opman.filter_oplog_entry(
                {'op': op, 'o': document})
            if filtered_result is not None:
                filtered_result = filtered_result['o']
            self.assertEqual(filtered_result, filtered_document)

        document = {'name': 'Han Solo', 'a': {'b': {}}}
        fields = ['name', 'a.b.c']
        filtered_document = {'name': 'Han Solo'}
        check_nested(document, fields, filtered_document)

        document = {'a': {'b': {'c': 2, 'e': 3}, 'e': 5},
                    'b': 2,
                    'c': {'g': 1}}
        fields = ['a.b.c', 'a.e']
        filtered_document = {'a': {'b': {'c': 2}, 'e': 5}}
        check_nested(document, fields, filtered_document)

        document = {'a': {'b': {'c': 2, 'e': 3}, 'e': 5},
                    'b': 2,
                    'c': {'g': 1},
                    '_id': 1}
        fields = ['a.b.c', 'a.e']
        filtered_document = {'a': {'b': {'c': 2}, 'e': 5}, '_id': 1}
        check_nested(document, fields, filtered_document)

        document = {'a': {'b': {'c': {'d': 1}}}, '-a': {'-b': {'-c': 2}}}
        fields = ['a.b', '-a']
        filtered_document = document.copy()
        check_nested(document, fields, filtered_document)

        document = {'a': {'b': {'c': {'d': 1}}}, '-a': {'-b': {'-c': 2}}}
        fields = ['a', '-a.-b']
        filtered_document = document.copy()
        check_nested(document, fields, filtered_document)
        document = {'a': {'b': {'c': {'d': 1}}}, '-a': {'-b': {'-c': 2}},
                    '_id': 1}

        fields = ['a.b', '-a']
        filtered_document = document.copy()
        check_nested(document, fields, filtered_document)
        fields = ['a', '-a.-b']
        check_nested(document, fields, filtered_document)

        document = {'test': 1}
        fields = ['doesnt_exist']
        filtered_document = {}
        check_nested(document, fields, filtered_document)

        document = {'a': {'b': 1}, 'b': {'a': 1}}
        fields = ['a.b', 'b.a']
        filtered_document = document.copy()
        check_nested(document, fields, filtered_document)

        document = {'a': {'b': {'a': {'b': 1}}}, 'c': {'a': {'b': 1}}}
        fields = ['a.b']
        filtered_document = {'a': {'b': {'a': {'b': 1}}}}
        check_nested(document, fields, filtered_document)

        document = {'name': 'anna', 'name_of_cat': 'pushkin'}
        fields = ['name']
        filtered_document = {'name': 'anna'}
        check_nested(document, fields, filtered_document)

        update = {'$set': {'a.b': 1, 'a.c': 3, 'b': 2, 'c': {'b': 3}}}
        fields = ['a', 'c']
        filtered_update = {'$set': {'a.b': 1, 'a.c': 3, 'c': {'b': 3}}}
        check_nested(update, fields, filtered_update, op='u')

        update = {'$set': {'a.b': {'c': 3, 'd': 1}, 'a.e': 1, 'a.f': 2}}
        fields = ['a.b.c', 'a.e']
        filtered_update = {'$set': {'a.b': {'c': 3}, 'a.e': 1}}
        check_nested(update, fields, filtered_update, op='u')

        update = {'$set': {'a.b.1': 1, 'a.b.2': 2, 'b': 3}}
        fields = ['a.b']
        filtered_update = {'$set': {'a.b.1': 1, 'a.b.2': 2}}
        check_nested(update, fields, filtered_update, op='u')

        update = {'$set': {'a.b': {'c': 3, 'd': 1}, 'a.e': 1}}
        fields = ['a.b.e']
        filtered_update = None
        check_nested(update, fields, filtered_update, op='u')

    def test_nested_exclude_fields(self):
        def check_nested(document, exclude_fields, filtered_document, op='i'):
            self.opman.exclude_fields = exclude_fields
            if '_id' in exclude_fields:
                exclude_fields.remove('_id')
            self.assertEqual(set(exclude_fields), self.opman._exclude_fields)
            self.assertEqual(sorted(exclude_fields),
                             sorted(self.opman.exclude_fields))
            filtered_result = self.opman.filter_oplog_entry(
                {'op': op, 'o': document})
            if filtered_result is not None:
                filtered_result = filtered_result['o']
            self.assertEqual(filtered_result, filtered_document)

        document = {'a': {'b': {'c': {'d': 0, 'e': 1}}}}
        exclude_fields = ['a.b.c.d']
        filtered_document = {'a': {'b': {'c': {'e': 1}}}}
        check_nested(document, exclude_fields, filtered_document)

        document = {'a': {'b': {'c': {'-a': 0, 'd': {'e': {'f': 1}}}}}}
        exclude_fields = ['a.b.c.d.e.f']
        filtered_document = {'a': {'b': {'c': {'-a': 0, 'd': {'e': {}}}}}}
        check_nested(document, exclude_fields, filtered_document)

        document = {'a': 1}
        exclude_fields = ['a']
        filtered_document = {}
        check_nested(document, exclude_fields, filtered_document)

        document = {'a': {'b': {'c': 2, 'e': 3}, 'e': 5},
                    'b': 2,
                    'c': {'g': 1}}
        exclude_fields = ['a.b.c', 'a.e']
        filtered_document = {'a': {'b': {'e': 3}},
                             'b': 2,
                             'c': {'g': 1}}
        check_nested(document, exclude_fields, filtered_document)

        document = {'a': {'b': {'c': 2, 'e': 3}, 'e': 5},
                    'b': 2,
                    'c': {'g': 1},
                    '_id': 1}
        exclude_fields = ['a.b.c', 'a.e', '_id']
        filtered_document = {'a': {'b': {'e': 3}},
                             'b': 2, 'c': {'g': 1},
                             '_id': 1}
        check_nested(document, exclude_fields, filtered_document)

        document = {'a': {'b': {'c': {'d': 1}}},
                    '-a': {'-b': {'-c': 2}}}
        exclude_fields = ['a.b', '-a']
        filtered_document = {'a': {}}
        check_nested(document, exclude_fields, filtered_document)

        document = {'a': {'b': {'c': {'d': 1}}},
                    '-a': {'-b': {'-c': 2}}}
        exclude_fields = ['a', '-a.-b']
        filtered_document = {'-a': {}}
        check_nested(document, exclude_fields, filtered_document)

        document = {'a': {'b': {'c': {'d': 1}}},
                    '-a': {'-b': {'-c': 2}},
                    '_id': 1}
        exclude_fields = ['a.b', '-a']
        filtered_document = {'_id': 1, 'a': {}}
        check_nested(document, exclude_fields, filtered_document)

        document = {'test': 1}
        exclude_fields = ['doesnt_exist']
        filtered_document = document.copy()
        check_nested(document, exclude_fields, filtered_document)

        document = {'test': 1}
        exclude_fields = ['test.doesnt_exist']
        filtered_document = document.copy()
        check_nested(document, exclude_fields, filtered_document)

        document = {'a': {'b': 1}, 'b': {'a': 1}}
        exclude_fields = ['a.b', 'b.a']
        filtered_document = {'a': {}, 'b': {}}
        check_nested(document, exclude_fields, filtered_document)

        document = {'a': {'b': {'a': {'b': 1}}}, 'c': {'a': {'b': 1}}}
        exclude_fields = ['a.b']
        filtered_document = {'a': {}, 'c': {'a': {'b': 1}}}
        check_nested(document, exclude_fields, filtered_document)

        document = {'name': 'anna', 'name_of_cat': 'pushkin'}
        exclude_fields = ['name']
        filtered_document = {'name_of_cat': 'pushkin'}
        check_nested(document, exclude_fields, filtered_document)

        update = {'$set': {'a.b': 1, 'a.c': 3, 'b': 2, 'c': {'b': 3}}}
        exclude_fields = ['a', 'c']
        filtered_update = {'$set': {'b': 2}}
        check_nested(update, exclude_fields, filtered_update, op='u')

        update = {'$set': {'a.b': {'c': 3, 'd': 1}, 'a.e': 1, 'a.f': 2}}
        exclude_fields = ['a.b.c', 'a.e']
        filtered_update = {'$set': {'a.b': {'d': 1}, 'a.f': 2}}
        check_nested(update, exclude_fields, filtered_update, op='u')

        update = {'$set': {'a.b': {'c': 3, 'd': 1}, 'a.e': 1}}
        exclude_fields = ['a.b.c', 'a.b.d', 'a.e']
        filtered_update = {'$set': {'a.b': {}}}
        check_nested(update, exclude_fields, filtered_update, op='u')

        update = {'$set': {'a.b.1': 1, 'a.b.2': 2, 'b': 3}}
        exclude_fields = ['a.b']
        filtered_update = {'$set': {'b': 3}}
        check_nested(update, exclude_fields, filtered_update, op='u')

        update = {'$set': {'a.b.c': 42, 'd.e.f': 123, 'g': 456}}
        exclude_fields = ['a.b', 'd']
        filtered_update = {'$set': {'g': 456}}
        check_nested(update, exclude_fields, filtered_update, op='u')

        update = {'$set': {'a.b': {'c': 3, 'd': 1}, 'a.e': 1}}
        exclude_fields = ['a.b', 'a.e']
        filtered_update = None
        check_nested(update, exclude_fields, filtered_update, op='u')

    def test_fields_and_exclude(self):
        fields = ['a', 'b', 'c', '_id']
        exclude_fields = ['x', 'y', 'z']

        # Test setting both to None in constructor
        opman = OplogThread(
            primary_client=self.primary_conn,
            doc_managers=(DocManager(),),
            oplog_progress_dict=LockingDict(),
            dest_mapping_stru=self.dest_mapping_stru,
            fields=None,
            exclude_fields=None
        )
        self._check_fields(opman, [], [], None)
        opman = OplogThread(
            primary_client=self.primary_conn,
            doc_managers=(DocManager(),),
            oplog_progress_dict=LockingDict(),
            dest_mapping_stru=self.dest_mapping_stru,
            fields=None,
            exclude_fields=exclude_fields
        )
        self._check_fields(opman, [], exclude_fields,
                           dict((f, 0) for f in exclude_fields))
        # Test setting fields when exclude_fields is set
        self.assertRaises(
            errors.InvalidConfiguration, setattr, opman, "fields", fields)
        self.assertRaises(
            errors.InvalidConfiguration, setattr, opman, "fields", None)
        opman = OplogThread(
            primary_client=self.primary_conn,
            doc_managers=(DocManager(),),
            oplog_progress_dict=LockingDict(),
            dest_mapping_stru=self.dest_mapping_stru,
            exclude_fields=None,
            fields=fields
        )
        self._check_fields(opman, fields, [], dict((f, 1) for f in fields))
        self.assertRaises(errors.InvalidConfiguration, setattr, opman,
                          "exclude_fields", exclude_fields)
        self.assertRaises(errors.InvalidConfiguration, setattr, opman,
                          "exclude_fields", None)
        self.assertRaises(
            errors.InvalidConfiguration, OplogThread,
            self.primary_conn,
            (DocManager(),),
            LockingDict(),
            self.dest_mapping_stru,
            fields=fields,
            exclude_fields=exclude_fields)
class TestOplogManager(unittest.TestCase):
    """Defines all the testing methods, as well as a method that sets up the
        cluster
    """

    def setUp(self):
        _, _, self.primary_p = start_replica_set('test-oplog-manager')
        self.primary_conn = pymongo.MongoClient(mongo_host, self.primary_p)
        self.oplog_coll = self.primary_conn.local['oplog.rs']
        self.opman = OplogThread(
            primary_conn=self.primary_conn,
            main_address='%s:%d' % (mongo_host, self.primary_p),
            oplog_coll=self.oplog_coll,
            is_sharded=False,
            doc_manager=DocManager(),
            oplog_progress_dict=LockingDict(),
            namespace_set=None,
            auth_key=None,
            auth_username=None,
            repl_set='test-oplog-manager'
        )

    def tearDown(self):
        try:
            self.opman.join()
        except RuntimeError:
            pass                # OplogThread may not have been started
        self.primary_conn.close()
        kill_replica_set('test-oplog-manager')

    def test_retrieve_doc(self):
        """ Test the retrieve_doc method """

        # Trivial case where the oplog entry is None
        self.assertEqual(self.opman.retrieve_doc(None), None)

        # Retrieve a document from insert operation in oplog
        doc = {"name": "mango", "type": "fruit",
               "ns": "test.test", "weight": 3.24, "i": 1}
        self.primary_conn["test"]["test"].insert(doc)
        oplog_entries = self.primary_conn["local"]["oplog.rs"].find(
            sort=[("ts", pymongo.DESCENDING)],
            limit=1
        )
        oplog_entry = next(oplog_entries)
        self.assertEqual(self.opman.retrieve_doc(oplog_entry), doc)

        # Retrieve a document from update operation in oplog
        self.primary_conn["test"]["test"].update(
            {"i": 1},
            {"$set": {"sounds-like": "mongo"}}
        )
        oplog_entries = self.primary_conn["local"]["oplog.rs"].find(
            sort=[("ts", pymongo.DESCENDING)],
            limit=1
        )
        doc["sounds-like"] = "mongo"
        self.assertEqual(self.opman.retrieve_doc(next(oplog_entries)), doc)

        # Retrieve a document from remove operation in oplog
        # (expected: None)
        self.primary_conn["test"]["test"].remove({
            "i": 1
        })
        oplog_entries = self.primary_conn["local"]["oplog.rs"].find(
            sort=[("ts", pymongo.DESCENDING)],
            limit=1
        )
        self.assertEqual(self.opman.retrieve_doc(next(oplog_entries)), None)

        # Retrieve a document with bad _id
        # (expected: None)
        oplog_entry["o"]["_id"] = "ThisIsNotAnId123456789"
        self.assertEqual(self.opman.retrieve_doc(oplog_entry), None)

    def test_get_oplog_cursor(self):
        '''Test the get_oplog_cursor method'''

        # Trivial case: timestamp is None
        self.assertEqual(self.opman.get_oplog_cursor(None), None)

        # earliest entry is after given timestamp
        doc = {"ts": bson.Timestamp(1000, 0), "i": 1}
        self.primary_conn["test"]["test"].insert(doc)
        self.assertEqual(self.opman.get_oplog_cursor(
            bson.Timestamp(1, 0)), None)

        # earliest entry is the only one at/after timestamp
        latest_timestamp = self.opman.get_last_oplog_timestamp()
        cursor = self.opman.get_oplog_cursor(latest_timestamp)
        self.assertNotEqual(cursor, None)
        self.assertEqual(cursor.count(), 1)
        self.assertEqual(self.opman.retrieve_doc(next(cursor)), doc)

        # many entries before and after timestamp
        self.primary_conn["test"]["test"].insert(
            {"i": i} for i in range(2, 1002))
        oplog_cursor = self.oplog_coll.find(
            sort=[("ts", pymongo.ASCENDING)]
        )

        # startup + insert + 1000 inserts
        self.assertEqual(oplog_cursor.count(), 2 + 1000)
        pivot = oplog_cursor.skip(400).limit(1)[0]

        goc_cursor = self.opman.get_oplog_cursor(pivot["ts"])
        self.assertEqual(goc_cursor.count(), 2 + 1000 - 400)

        # get_oplog_cursor fast-forwards *one doc beyond* the given timestamp
        doc = self.primary_conn["test"]["test"].find_one(
            {"_id": next(goc_cursor)["o"]["_id"]})
        self.assertEqual(doc["i"], self.opman.retrieve_doc(pivot)["i"] + 1)

    def test_get_last_oplog_timestamp(self):
        """Test the get_last_oplog_timestamp method"""

        # "empty" the oplog
        self.opman.oplog = self.primary_conn["test"]["emptycollection"]
        self.assertEqual(self.opman.get_last_oplog_timestamp(), None)

        # Test non-empty oplog
        self.opman.oplog = self.primary_conn["local"]["oplog.rs"]
        for i in range(1000):
            self.primary_conn["test"]["test"].insert({
                "i": i + 500
            })
        oplog = self.primary_conn["local"]["oplog.rs"]
        oplog = oplog.find().sort("$natural", pymongo.DESCENDING).limit(1)[0]
        self.assertEqual(self.opman.get_last_oplog_timestamp(),
                         oplog["ts"])

    def test_dump_collection(self):
        """Test the dump_collection method

        Cases:

        1. empty oplog
        2. non-empty oplog
        """

        # Test with empty oplog
        self.opman.oplog = self.primary_conn["test"]["emptycollection"]
        last_ts = self.opman.dump_collection()
        self.assertEqual(last_ts, None)

        # Test with non-empty oplog
        self.opman.oplog = self.primary_conn["local"]["oplog.rs"]
        for i in range(1000):
            self.primary_conn["test"]["test"].insert({
                "i": i + 500
            })
        last_ts = self.opman.get_last_oplog_timestamp()
        self.assertEqual(last_ts, self.opman.dump_collection())
        self.assertEqual(len(self.opman.doc_managers[0]._search()), 1000)

    def test_init_cursor(self):
        """Test the init_cursor method

        Cases:

        1. no last checkpoint, no collection dump
        2. no last checkpoint, collection dump ok and stuff to dump
        3. no last checkpoint, nothing to dump, stuff in oplog
        4. no last checkpoint, nothing to dump, nothing in oplog
        5. last checkpoint exists
        """

        # N.B. these sub-cases build off of each other and cannot be re-ordered
        # without side-effects

        # No last checkpoint, no collection dump, nothing in oplog
        # "change oplog collection" to put nothing in oplog
        self.opman.oplog = self.primary_conn["test"]["emptycollection"]
        self.opman.collection_dump = False
        self.assertEqual(self.opman.init_cursor(), None)
        self.assertEqual(self.opman.checkpoint, None)

        # No last checkpoint, empty collections, nothing in oplog
        self.opman.collection_dump = True
        self.assertEqual(self.opman.init_cursor(), None)
        self.assertEqual(self.opman.checkpoint, None)

        # No last checkpoint, empty collections, something in oplog
        self.opman.oplog = self.primary_conn['local']['oplog.rs']
        collection = self.primary_conn["test"]["test"]
        collection.insert({"i": 1})
        collection.remove({"i": 1})
        time.sleep(3)
        last_ts = self.opman.get_last_oplog_timestamp()
        self.assertEqual(next(self.opman.init_cursor())["ts"], last_ts)
        self.assertEqual(self.opman.checkpoint, last_ts)
        with self.opman.oplog_progress as prog:
            self.assertEqual(prog.get_dict()[str(self.opman.oplog)], last_ts)

        # No last checkpoint, non-empty collections, stuff in oplog
        self.opman.oplog_progress = LockingDict()
        self.assertEqual(next(self.opman.init_cursor())["ts"], last_ts)
        self.assertEqual(self.opman.checkpoint, last_ts)
        with self.opman.oplog_progress as prog:
            self.assertEqual(prog.get_dict()[str(self.opman.oplog)], last_ts)

        # Last checkpoint exists
        progress = LockingDict()
        self.opman.oplog_progress = progress
        for i in range(1000):
            collection.insert({"i": i + 500})
        entry = list(
            self.primary_conn["local"]["oplog.rs"].find(skip=200, limit=2))
        progress.get_dict()[str(self.opman.oplog)] = entry[0]["ts"]
        self.opman.oplog_progress = progress
        self.opman.checkpoint = None
        cursor = self.opman.init_cursor()
        self.assertEqual(entry[1]["ts"], next(cursor)["ts"])
        self.assertEqual(self.opman.checkpoint, entry[0]["ts"])
        with self.opman.oplog_progress as prog:
            self.assertEqual(prog.get_dict()[str(self.opman.oplog)],
                             entry[0]["ts"])

    def test_filter_fields(self):
        docman = self.opman.doc_managers[0]
        conn = self.opman.main_connection

        include_fields = ["a", "b", "c"]
        exclude_fields = ["d", "e", "f"]

        # Set fields to care about
        self.opman.fields = include_fields
        # Documents have more than just these fields
        doc = {
            "a": 1, "b": 2, "c": 3,
            "d": 4, "e": 5, "f": 6,
            "_id": 1
        }
        db = conn['test']['test']
        db.insert(doc)
        assert_soon(lambda: db.count() == 1)
        self.opman.dump_collection()

        result = docman._search()[0]
        keys = result.keys()
        for inc, exc in zip(include_fields, exclude_fields):
            self.assertIn(inc, keys)
            self.assertNotIn(exc, keys)

    def test_namespace_mapping(self):
        """Test mapping of namespaces
        Cases:

        upsert/delete/update of documents:
        1. in namespace set, mapping provided
        2. outside of namespace set, mapping provided
        """

        source_ns = ["test.test1", "test.test2"]
        phony_ns = ["test.phony1", "test.phony2"]
        dest_mapping = {"test.test1": "test.test1_dest",
                        "test.test2": "test.test2_dest"}
        self.opman.dest_mapping = dest_mapping
        self.opman.namespace_set = source_ns
        docman = self.opman.doc_managers[0]
        # start replicating
        self.opman.start()

        base_doc = {"_id": 1, "name": "superman"}

        # doc in namespace set
        for ns in source_ns:
            db, coll = ns.split(".", 1)

            # test insert
            self.primary_conn[db][coll].insert(base_doc)

            assert_soon(lambda: len(docman._search()) == 1)
            self.assertEqual(docman._search()[0]["ns"], dest_mapping[ns])
            bad = [d for d in docman._search() if d["ns"] == ns]
            self.assertEqual(len(bad), 0)

            # test update
            self.primary_conn[db][coll].update(
                {"_id": 1},
                {"$set": {"weakness": "kryptonite"}}
            )

            def update_complete():
                docs = docman._search()
                for d in docs:
                    if d.get("weakness") == "kryptonite":
                        return True
                    return False
            assert_soon(update_complete)
            self.assertEqual(docman._search()[0]["ns"], dest_mapping[ns])
            bad = [d for d in docman._search() if d["ns"] == ns]
            self.assertEqual(len(bad), 0)

            # test delete
            self.primary_conn[db][coll].remove({"_id": 1})
            assert_soon(lambda: len(docman._search()) == 0)
            bad = [d for d in docman._search()
                   if d["ns"] == dest_mapping[ns]]
            self.assertEqual(len(bad), 0)

            # cleanup
            self.primary_conn[db][coll].remove()
            self.opman.doc_managers[0]._delete()

        # doc not in namespace set
        for ns in phony_ns:
            db, coll = ns.split(".", 1)

            # test insert
            self.primary_conn[db][coll].insert(base_doc)
            time.sleep(1)
            self.assertEqual(len(docman._search()), 0)
            # test update
            self.primary_conn[db][coll].update(
                {"_id": 1},
                {"$set": {"weakness": "kryptonite"}}
            )
            time.sleep(1)
            self.assertEqual(len(docman._search()), 0)

    def test_many_targets(self):
        """Test that one OplogThread is capable of replicating to more than
        one target.
        """
        doc_managers = [DocManager(), DocManager(), DocManager()]
        self.opman.doc_managers = doc_managers

        # start replicating
        self.opman.start()
        self.primary_conn["test"]["test"].insert({
            "name": "kermit",
            "color": "green"
        })
        self.primary_conn["test"]["test"].insert({
            "name": "elmo",
            "color": "firetruck red"
        })

        assert_soon(
            lambda: sum(len(d._search()) for d in doc_managers) == 6,
            "OplogThread should be able to replicate to multiple targets"
        )

        self.primary_conn["test"]["test"].remove({"name": "elmo"})

        assert_soon(
            lambda: sum(len(d._search()) for d in doc_managers) == 3,
            "OplogThread should be able to replicate to multiple targets"
        )
        for d in doc_managers:
            self.assertEqual(d._search()[0]["name"], "kermit")
class TestOplogManager(unittest.TestCase):
    """Defines all the testing methods, as well as a method that sets up the
        cluster
    """

    def setUp(self):
        self.repl_set = ReplicaSetSingle().start()
        self.primary_conn = self.repl_set.client()
        self.oplog_coll = self.primary_conn.local['oplog.rs']

    def reset_opman(self, include_ns=None, exclude_ns=None, dest_mapping=None):
        if include_ns is None:
            include_ns = []
        if exclude_ns is None:
            exclude_ns = []
        if dest_mapping is None:
            dest_mapping = {}

        # include_ns must not exist together with exclude_ns
        # dest_mapping must exist together with include_ns
        # those checks have been tested in test_config.py so we skip that here.

        self.dest_mapping_stru = DestMapping(include_ns, exclude_ns,
                                             dest_mapping)
        self.opman = OplogThread(
            primary_client=self.primary_conn,
            doc_managers=(DocManager(),),
            oplog_progress_dict=LockingDict(),
            dest_mapping_stru=self.dest_mapping_stru,
            ns_set=include_ns,
            ex_ns_set=exclude_ns
        )

    def init_dbs(self):
        # includedb1.* & includedb2.includecol1 are interested collections
        self.primary_conn["includedb1"]["includecol1"].insert_many(
            [{"idb1col1": i} for i in range(1, 3)])

        self.primary_conn["includedb1"]["includecol2"].insert_many(
            [{"idb1col2": i} for i in range(1, 3)])

        self.primary_conn["includedb2"]["includecol1"].insert_many(
            [{"idb2col1": i} for i in range(1, 3)])

        # the others are not interested collections
        self.primary_conn["includedb2"]["excludecol2"].insert_many(
            [{"idb2col2": i} for i in range(1, 3)])

        self.primary_conn["excludedb3"]["excludecol1"].insert_many(
            [{"idb3col1": i} for i in range(1, 3)])

    def tearDown(self):
        try:
            self.opman.join()
        except RuntimeError:
            pass                # OplogThread may not have been started

        for db in self.primary_conn.database_names():
            if db != "local":
                self.primary_conn.drop_database(db)
        close_client(self.primary_conn)
        self.repl_set.stop()

    def test_get_oplog_cursor(self):
        '''Test the get_oplog_cursor method'''

        # Put something in the dbs
        self.init_dbs()

        # timestamp is None - all oplog entries excluding no-ops are returned.
        # wildcard include case no impact the result
        self.reset_opman(["includedb1.*", "includedb2.includecol1"], [], {})

        got_cursor = self.opman.get_oplog_cursor(None)
        oplog_cursor = self.oplog_coll.find(
            {'op': {'$ne': 'n'}})
        self.assertNotEqual(got_cursor, None)
        self.assertEqual(got_cursor.count(), oplog_cursor.count())

        # wildcard exclude case no impact the result
        self.reset_opman([], ["includedb2.excludecol2", "excludedb3.*"], {})

        got_cursor = self.opman.get_oplog_cursor(None)
        oplog_cursor = self.oplog_coll.find(
            {'op': {'$ne': 'n'}})
        self.assertNotEqual(got_cursor, None)
        self.assertEqual(got_cursor.count(), oplog_cursor.count())

        # earliest entry is the only one at/after timestamp
        doc = {"ts": bson.Timestamp(1000, 0), "idb1col1": 1}
        self.primary_conn["includedb1"]["includecol1"].insert_one(doc)
        latest_timestamp = self.opman.get_last_oplog_timestamp()
        cursor = self.opman.get_oplog_cursor(latest_timestamp)
        self.assertNotEqual(cursor, None)
        self.assertEqual(cursor.count(), 1)
        next_entry_id = next(cursor)['o']['_id']
        retrieved = self.primary_conn.includedb1.includecol1.find_one(
                    next_entry_id)
        self.assertEqual(retrieved, doc)

        # many entries before and after timestamp
        self.primary_conn["includedb1"]["includecol1"].insert_many(
            [{"idb1col1": i} for i in range(2, 1002)])
        oplog_cursor = self.oplog_coll.find(
            {'op': {'$ne': 'n'},
             'ns': {'$not': re.compile(r'\.(system|\$cmd)')}},
            sort=[("ts", pymongo.ASCENDING)])

        # initial insert + 1000 more inserts
        self.assertEqual(oplog_cursor.count(), 11 + 1000)
        pivot = oplog_cursor.skip(400).limit(-1)[0]

        goc_cursor = self.opman.get_oplog_cursor(pivot["ts"])
        self.assertEqual(goc_cursor.count(), 11 + 1000 - 400)

    def test_get_last_oplog_timestamp(self):
        """Test the get_last_oplog_timestamp method"""

        # empty oplog case has been tested in test_oplog_manager.py,
        # skip that here.

        # Put something in the dbs
        self.init_dbs()

        # Test non-empty oplog
        self.reset_opman(["includedb1.*", "includedb2.includecol1"], [], {})
        self.opman.oplog = self.primary_conn["local"]["oplog.rs"]
        for i in range(1000):
            self.primary_conn["includedb1"]["includecol1"].insert_one({
                "idb1col1": i + 500
            })
        oplog = self.primary_conn["local"]["oplog.rs"]
        oplog = oplog.find(
                {'op': {'$ne': 'n'}}).sort(
                "$natural", pymongo.DESCENDING).limit(-1)[0]
        self.assertEqual(self.opman.get_last_oplog_timestamp(),
                         oplog["ts"])

    def test_dump_collection(self):
        """Test the dump_collection method

        Cases:

        1. no namespace set is set
        2. include namespace set is set
        3. exclude namespace set is set

        empty oplog case has been tested in test_oplog_manager.py,
        skip that here.
        """

        # Put something in the dbs
        self.init_dbs()

        # no namespace set is set
        self.reset_opman([], [], {})
        self.opman.oplog = self.primary_conn["local"]["oplog.rs"]
        last_ts = self.opman.get_last_oplog_timestamp()
        self.assertEqual(last_ts, self.opman.dump_collection())
        self.assertEqual(len(self.opman.doc_managers[0]._search()), 10)

        # include namespace set is set
        self.reset_opman(["includedb1.*", "includedb2.includecol1"], [], {})
        self.opman.oplog = self.primary_conn["local"]["oplog.rs"]
        last_ts = self.opman.get_last_oplog_timestamp()
        self.assertEqual(last_ts, self.opman.dump_collection())
        self.assertEqual(len(self.opman.doc_managers[0]._search()), 6)

        # exclude namespace set is set
        self.reset_opman([], ["includedb2.excludecol2", "excludedb3.*"], {})
        self.opman.oplog = self.primary_conn["local"]["oplog.rs"]
        last_ts = self.opman.get_last_oplog_timestamp()
        self.assertEqual(last_ts, self.opman.dump_collection())
        self.assertEqual(len(self.opman.doc_managers[0]._search()), 6)

    def test_dump_collection_with_error(self):
        """Test the dump_collection method with invalid documents.

        Cases:

        1. non-empty oplog, continue_on_error=True, invalid documents
        """

        self.reset_opman(["includedb1.*", "includedb2.includecol1"], [], {})
        # non-empty oplog, continue_on_error=True, invalid documents
        self.opman.continue_on_error = True
        self.opman.oplog = self.primary_conn["local"]["oplog.rs"]

        docs = [{'a': i} for i in range(100)]
        for i in range(50, 60):
            docs[i]['_upsert_exception'] = True
        self.primary_conn['includedb1']['includecol3'].insert_many(docs)

        last_ts = self.opman.get_last_oplog_timestamp()
        self.assertEqual(last_ts, self.opman.dump_collection())
        docs = self.opman.doc_managers[0]._search()
        docs = list(filter(lambda doc: 'a' in doc, docs))
        docs.sort(key=lambda doc: doc['a'])

        self.assertEqual(len(docs), 90)
        expected_a = itertools.chain(range(0, 50), range(60, 100))
        for doc, correct_a in zip(docs, expected_a):
            self.assertEqual(doc['a'], correct_a)

    def test_init_cursor(self):
        """Test the init_cursor method

        Cases:

        1. no last checkpoint, no collection dump
        2. no last checkpoint, collection dump ok and stuff to dump
        3. no last checkpoint, nothing to dump, stuff in oplog
        4. no last checkpoint, nothing to dump, nothing in oplog
        5. no last checkpoint, no collection dump, stuff in oplog
        6. last checkpoint exists
        7. last checkpoint is behind
        """

        # N.B. these sub-cases build off of each other and cannot be re-ordered
        # without side-effects

        self.reset_opman(["includedb1.*", "includedb2.includecol1"], [], {})

        # No last checkpoint, no collection dump, nothing in oplog
        # "change oplog collection" to put nothing in oplog
        self.opman.oplog = self.primary_conn["includedb1"]["emptycollection"]
        self.opman.collection_dump = False
        self.assertTrue(all(doc['op'] == 'n'
                            for doc in self.opman.init_cursor()[0]))
        self.assertEqual(self.opman.checkpoint, None)

        # No last checkpoint, empty collections, nothing in oplog
        self.opman.collection_dump = True
        cursor, cursor_empty = self.opman.init_cursor()
        self.assertEqual(cursor, None)
        self.assertTrue(cursor_empty)
        self.assertEqual(self.opman.checkpoint, None)

        # No last checkpoint, empty collections, something in oplog
        self.opman.oplog = self.primary_conn['local']['oplog.rs']
        collection = self.primary_conn["includedb1"]["includecol1"]
        collection.insert_one({"idb1col1": 1})
        collection.delete_one({"idb1col1": 1})
        time.sleep(3)
        last_ts = self.opman.get_last_oplog_timestamp()
        cursor, cursor_empty = self.opman.init_cursor()
        self.assertFalse(cursor_empty)
        self.assertEqual(self.opman.checkpoint, last_ts)
        self.assertEqual(self.opman.read_last_checkpoint(), last_ts)

        # No last checkpoint, no collection dump, something in oplog
        # If collection dump is false the checkpoint should not be set
        self.opman.checkpoint = None
        self.opman.oplog_progress = LockingDict()
        self.opman.collection_dump = False
        collection.insert_one({"idb1col1": 2})
        cursor, cursor_empty = self.opman.init_cursor()
        for doc in cursor:
            last_doc = doc
        self.assertEqual(last_doc['o']['idb1col1'], 2)
        self.assertIsNone(self.opman.checkpoint)

        # Last checkpoint exists
        collection.insert_many([{"idb1col1": i + 500} for i in range(1000)])
        entry = list(
            self.primary_conn["local"]["oplog.rs"].find(skip=200, limit=-2))
        self.opman.update_checkpoint(entry[0]["ts"])
        cursor, cursor_empty = self.opman.init_cursor()
        self.assertEqual(next(cursor)["ts"], entry[1]["ts"])
        self.assertEqual(self.opman.checkpoint, entry[0]["ts"])
        self.assertEqual(self.opman.read_last_checkpoint(), entry[0]["ts"])

        # Last checkpoint is behind
        self.opman.update_checkpoint(bson.Timestamp(1, 0))
        cursor, cursor_empty = self.opman.init_cursor()
        self.assertTrue(cursor_empty)
        self.assertEqual(cursor, None)
        self.assertEqual(self.opman.checkpoint, bson.Timestamp(1, 0))

    def test_namespace_mapping(self):
        """Test mapping of namespaces
        Cases:

        upsert/delete/update of documents:
        1. in namespace set, mapping provided
        2. outside of namespace set, mapping provided
        """

        source_ns_wildcard = ["includedb1.*", "includedb2.includecol1"]
        source_ns = ["includedb1.includecol1",
                     "includedb1.includecol2",
                     "includedb2.includecol1"]
        phony_ns = ["includedb2.excludecol2", "excludedb3.excludecol1"]
        dest_mapping = {
                        "includedb1.*": "newdb1_*.bar",
                        "includedb2.includecol1": "newdb2.newcol1"
                        }
        self.reset_opman(source_ns_wildcard, [], dest_mapping)
        docman = self.opman.doc_managers[0]
        dest_mapping_stru = self.opman.dest_mapping_stru

        # start replicating
        self.opman.start()

        base_doc = {"_id": 1, "name": "superman"}

        # doc in namespace set
        for ns in source_ns:
            db, coll = ns.split(".", 1)

            # test insert
            self.primary_conn[db][coll].insert_one(base_doc)

            assert_soon(lambda: len(docman._search()) == 1)
            self.assertEqual(docman._search()[0]["ns"],
                             dest_mapping_stru.get(ns))
            bad = [d for d in docman._search() if d["ns"] == ns]
            self.assertEqual(len(bad), 0)

            # test update
            self.primary_conn[db][coll].update_one(
                {"_id": 1},
                {"$set": {"weakness": "kryptonite"}}
            )

            def update_complete():
                docs = docman._search()
                for d in docs:
                    if d.get("weakness") == "kryptonite":
                        return True
                    return False
            assert_soon(update_complete)
            self.assertEqual(docman._search()[0]["ns"],
                             dest_mapping_stru.get(ns))
            bad = [d for d in docman._search() if d["ns"] == ns]
            self.assertEqual(len(bad), 0)

            # test delete
            self.primary_conn[db][coll].delete_one({"_id": 1})
            assert_soon(lambda: len(docman._search()) == 0)
            bad = [d for d in docman._search()
                   if d["ns"] == dest_mapping_stru.get(ns)]
            self.assertEqual(len(bad), 0)

            # cleanup
            self.primary_conn[db][coll].delete_many({})
            self.opman.doc_managers[0]._delete()

        # doc not in namespace set
        for ns in phony_ns:
            db, coll = ns.split(".", 1)

            # test insert
            self.primary_conn[db][coll].insert_one(base_doc)
            time.sleep(1)
            self.assertEqual(len(docman._search()), 0)
            # test update
            self.primary_conn[db][coll].update_one(
                {"_id": 1},
                {"$set": {"weakness": "kryptonite"}}
            )
            time.sleep(1)
            self.assertEqual(len(docman._search()), 0)

    def test_many_targets(self):
        """Test that one OplogThread is capable of replicating to more than
        one target.
        """

        self.reset_opman(["includedb1.*"], [], {})
        doc_managers = [DocManager(), DocManager(), DocManager()]
        self.opman.doc_managers = doc_managers

        # start replicating
        self.opman.start()
        self.primary_conn["includedb1"]["includecol1"].insert_one({
            "name": "kermit",
            "color": "green"
        })
        self.primary_conn["includedb1"]["includecol2"].insert_one({
            "name": "elmo",
            "color": "firetruck red"
        })
        self.primary_conn["excludedb2"]["excludecol1"].insert_one({
            "name": "panda",
            "color": "white and black"
        })

        assert_soon(
            lambda: sum(len(d._search()) for d in doc_managers) == 6,
            "OplogThread should be able to replicate to multiple targets"
        )

        self.primary_conn["includedb1"]["includecol2"].delete_one({
            "name": "elmo"
        })

        assert_soon(
            lambda: sum(len(d._search()) for d in doc_managers) == 3,
            "OplogThread should be able to replicate to multiple targets"
        )
        for d in doc_managers:
            self.assertEqual(d._search()[0]["name"], "kermit")
class TestOplogManager(unittest.TestCase):
    """Defines all the testing methods, as well as a method that sets up the
        cluster
    """

    def setUp(self):
        self.repl_set = ReplicaSet().start()
        self.primary_conn = self.repl_set.client()
        self.oplog_coll = self.primary_conn.local['oplog.rs']
        self.opman = OplogThread(
            primary_client=self.primary_conn,
            doc_managers=(DocManager(),),
            oplog_progress_dict=LockingDict()
        )

    def tearDown(self):
        try:
            self.opman.join()
        except RuntimeError:
            pass                # OplogThread may not have been started
        self.primary_conn.drop_database("test")
        close_client(self.primary_conn)
        self.repl_set.stop()

    def test_get_oplog_cursor(self):
        '''Test the get_oplog_cursor method'''

        # timestamp is None - all oplog entries are returned.
        cursor = self.opman.get_oplog_cursor(None)
        self.assertEqual(cursor.count(),
                         self.primary_conn["local"]["oplog.rs"].count())

        # earliest entry is the only one at/after timestamp
        doc = {"ts": bson.Timestamp(1000, 0), "i": 1}
        self.primary_conn["test"]["test"].insert_one(doc)
        latest_timestamp = self.opman.get_last_oplog_timestamp()
        cursor = self.opman.get_oplog_cursor(latest_timestamp)
        self.assertNotEqual(cursor, None)
        self.assertEqual(cursor.count(), 1)
        next_entry_id = next(cursor)['o']['_id']
        retrieved = self.primary_conn.test.test.find_one(next_entry_id)
        self.assertEqual(retrieved, doc)

        # many entries before and after timestamp
        self.primary_conn["test"]["test"].insert_many(
            [{"i": i} for i in range(2, 1002)])
        oplog_cursor = self.oplog_coll.find(
            {'op': {'$ne': 'n'},
             'ns': {'$not': re.compile(r'\.(system|\$cmd)')}},
            sort=[("ts", pymongo.ASCENDING)]
        )

        # initial insert + 1000 more inserts
        self.assertEqual(oplog_cursor.count(), 1 + 1000)
        pivot = oplog_cursor.skip(400).limit(-1)[0]

        goc_cursor = self.opman.get_oplog_cursor(pivot["ts"])
        self.assertEqual(goc_cursor.count(), 1 + 1000 - 400)

    def test_get_last_oplog_timestamp(self):
        """Test the get_last_oplog_timestamp method"""

        # "empty" the oplog
        self.opman.oplog = self.primary_conn["test"]["emptycollection"]
        self.assertEqual(self.opman.get_last_oplog_timestamp(), None)

        # Test non-empty oplog
        self.opman.oplog = self.primary_conn["local"]["oplog.rs"]
        for i in range(1000):
            self.primary_conn["test"]["test"].insert_one({
                "i": i + 500
            })
        oplog = self.primary_conn["local"]["oplog.rs"]
        oplog = oplog.find().sort("$natural", pymongo.DESCENDING).limit(-1)[0]
        self.assertEqual(self.opman.get_last_oplog_timestamp(),
                         oplog["ts"])

    def test_dump_collection(self):
        """Test the dump_collection method

        Cases:

        1. empty oplog
        2. non-empty oplog
        """

        # Test with empty oplog
        self.opman.oplog = self.primary_conn["test"]["emptycollection"]
        last_ts = self.opman.dump_collection()
        self.assertEqual(last_ts, None)

        # Test with non-empty oplog
        self.opman.oplog = self.primary_conn["local"]["oplog.rs"]
        for i in range(1000):
            self.primary_conn["test"]["test"].insert_one({
                "i": i + 500
            })
        last_ts = self.opman.get_last_oplog_timestamp()
        self.assertEqual(last_ts, self.opman.dump_collection())
        self.assertEqual(len(self.opman.doc_managers[0]._search()), 1000)

    def test_dump_collection_with_error(self):
        """Test the dump_collection method with invalid documents.

        Cases:

        1. non-empty oplog, continue_on_error=True, invalid documents
        """

        # non-empty oplog, continue_on_error=True, invalid documents
        self.opman.continue_on_error = True
        self.opman.oplog = self.primary_conn["local"]["oplog.rs"]

        docs = [{'a': i} for i in range(100)]
        for i in range(50, 60):
            docs[i]['_upsert_exception'] = True
        self.primary_conn['test']['test'].insert_many(docs)

        last_ts = self.opman.get_last_oplog_timestamp()
        self.assertEqual(last_ts, self.opman.dump_collection())
        docs = self.opman.doc_managers[0]._search()
        docs.sort(key=lambda doc: doc['a'])

        self.assertEqual(len(docs), 90)
        expected_a = itertools.chain(range(0, 50), range(60, 100))
        for doc, correct_a in zip(docs, expected_a):
            self.assertEqual(doc['a'], correct_a)

    def test_init_cursor(self):
        """Test the init_cursor method

        Cases:

        1. no last checkpoint, no collection dump
        2. no last checkpoint, collection dump ok and stuff to dump
        3. no last checkpoint, nothing to dump, stuff in oplog
        4. no last checkpoint, nothing to dump, nothing in oplog
        5. no last checkpoint, no collection dump, stuff in oplog
        6. last checkpoint exists
        7. last checkpoint is behind
        """

        # N.B. these sub-cases build off of each other and cannot be re-ordered
        # without side-effects

        # No last checkpoint, no collection dump, nothing in oplog
        # "change oplog collection" to put nothing in oplog
        self.opman.oplog = self.primary_conn["test"]["emptycollection"]
        self.opman.collection_dump = False
        self.assertTrue(all(doc['op'] == 'n'
                            for doc in self.opman.init_cursor()[0]))
        self.assertEqual(self.opman.checkpoint, None)

        # No last checkpoint, empty collections, nothing in oplog
        self.opman.collection_dump = True
        cursor, cursor_empty = self.opman.init_cursor()
        self.assertEqual(cursor, None)
        self.assertTrue(cursor_empty)
        self.assertEqual(self.opman.checkpoint, None)

        # No last checkpoint, empty collections, something in oplog
        self.opman.oplog = self.primary_conn['local']['oplog.rs']
        collection = self.primary_conn["test"]["test"]
        collection.insert_one({"i": 1})
        collection.delete_one({"i": 1})
        time.sleep(3)
        last_ts = self.opman.get_last_oplog_timestamp()
        cursor, cursor_empty = self.opman.init_cursor()
        self.assertFalse(cursor_empty)
        self.assertEqual(self.opman.checkpoint, last_ts)
        with self.opman.oplog_progress as prog:
            self.assertEqual(prog.get_dict()[self.opman.replset_name],
                             last_ts)

        # No last checkpoint, no collection dump, something in oplog
        self.opman.oplog_progress = LockingDict()
        self.opman.collection_dump = False
        collection.insert_one({"i": 2})
        last_ts = self.opman.get_last_oplog_timestamp()
        cursor, cursor_empty = self.opman.init_cursor()
        for doc in cursor:
            last_doc = doc
        self.assertEqual(last_doc['o']['i'], 2)
        self.assertEqual(self.opman.checkpoint, last_ts)

        # Last checkpoint exists
        progress = LockingDict()
        self.opman.oplog_progress = progress
        for i in range(1000):
            collection.insert_one({"i": i + 500})
        entry = list(
            self.primary_conn["local"]["oplog.rs"].find(skip=200, limit=-2))
        progress.get_dict()[self.opman.replset_name] = entry[0]["ts"]
        self.opman.oplog_progress = progress
        self.opman.checkpoint = None
        cursor, cursor_empty = self.opman.init_cursor()
        self.assertEqual(next(cursor)["ts"], entry[1]["ts"])
        self.assertEqual(self.opman.checkpoint, entry[0]["ts"])
        with self.opman.oplog_progress as prog:
            self.assertEqual(prog.get_dict()[self.opman.replset_name],
                             entry[0]["ts"])

        # Last checkpoint is behind
        progress = LockingDict()
        progress.get_dict()[self.opman.replset_name] = bson.Timestamp(1, 0)
        self.opman.oplog_progress = progress
        self.opman.checkpoint = None
        cursor, cursor_empty = self.opman.init_cursor()
        self.assertTrue(cursor_empty)
        self.assertEqual(cursor, None)
        self.assertIsNotNone(self.opman.checkpoint)

    def test_namespace_mapping(self):
        """Test mapping of namespaces
        Cases:

        upsert/delete/update of documents:
        1. in namespace set, mapping provided
        2. outside of namespace set, mapping provided
        """

        source_ns = ["test.test1", "test.test2"]
        phony_ns = ["test.phony1", "test.phony2"]
        dest_mapping = {"test.test1": "test.test1_dest",
                        "test.test2": "test.test2_dest"}
        self.opman.dest_mapping = dest_mapping
        self.opman.namespace_set = source_ns
        docman = self.opman.doc_managers[0]
        # start replicating
        self.opman.start()

        base_doc = {"_id": 1, "name": "superman"}

        # doc in namespace set
        for ns in source_ns:
            db, coll = ns.split(".", 1)

            # test insert
            self.primary_conn[db][coll].insert_one(base_doc)

            assert_soon(lambda: len(docman._search()) == 1)
            self.assertEqual(docman._search()[0]["ns"], dest_mapping[ns])
            bad = [d for d in docman._search() if d["ns"] == ns]
            self.assertEqual(len(bad), 0)

            # test update
            self.primary_conn[db][coll].update_one(
                {"_id": 1},
                {"$set": {"weakness": "kryptonite"}}
            )

            def update_complete():
                docs = docman._search()
                for d in docs:
                    if d.get("weakness") == "kryptonite":
                        return True
                    return False
            assert_soon(update_complete)
            self.assertEqual(docman._search()[0]["ns"], dest_mapping[ns])
            bad = [d for d in docman._search() if d["ns"] == ns]
            self.assertEqual(len(bad), 0)

            # test delete
            self.primary_conn[db][coll].delete_one({"_id": 1})
            assert_soon(lambda: len(docman._search()) == 0)
            bad = [d for d in docman._search()
                   if d["ns"] == dest_mapping[ns]]
            self.assertEqual(len(bad), 0)

            # cleanup
            self.primary_conn[db][coll].delete_many({})
            self.opman.doc_managers[0]._delete()

        # doc not in namespace set
        for ns in phony_ns:
            db, coll = ns.split(".", 1)

            # test insert
            self.primary_conn[db][coll].insert_one(base_doc)
            time.sleep(1)
            self.assertEqual(len(docman._search()), 0)
            # test update
            self.primary_conn[db][coll].update_one(
                {"_id": 1},
                {"$set": {"weakness": "kryptonite"}}
            )
            time.sleep(1)
            self.assertEqual(len(docman._search()), 0)

    def test_many_targets(self):
        """Test that one OplogThread is capable of replicating to more than
        one target.
        """
        doc_managers = [DocManager(), DocManager(), DocManager()]
        self.opman.doc_managers = doc_managers

        # start replicating
        self.opman.start()
        self.primary_conn["test"]["test"].insert_one({
            "name": "kermit",
            "color": "green"
        })
        self.primary_conn["test"]["test"].insert_one({
            "name": "elmo",
            "color": "firetruck red"
        })

        assert_soon(
            lambda: sum(len(d._search()) for d in doc_managers) == 6,
            "OplogThread should be able to replicate to multiple targets"
        )

        self.primary_conn["test"]["test"].delete_one({"name": "elmo"})

        assert_soon(
            lambda: sum(len(d._search()) for d in doc_managers) == 3,
            "OplogThread should be able to replicate to multiple targets"
        )
        for d in doc_managers:
            self.assertEqual(d._search()[0]["name"], "kermit")

    def test_upgrade_oplog_progress(self):
        first_oplog_ts = self.opman.oplog.find_one()['ts']
        # Old format oplog progress file:
        progress = {str(self.opman.oplog): bson_ts_to_long(first_oplog_ts)}
        # Set up oplog managers to use the old format.
        oplog_progress = LockingDict()
        oplog_progress.dict = progress
        self.opman.oplog_progress = oplog_progress
        # Cause the oplog managers to update their checkpoints.
        self.opman.checkpoint = first_oplog_ts
        self.opman.update_checkpoint()
        # New format should be in place now.
        new_format = {self.opman.replset_name: first_oplog_ts}
        self.assertEqual(
            new_format,
            self.opman.oplog_progress.get_dict()
        )
class TestFilterFields(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        cls.repl_set = ReplicaSetSingle().start()
        cls.primary_conn = cls.repl_set.client()
        cls.oplog_coll = cls.primary_conn.local["oplog.rs"]

    @classmethod
    def tearDownClass(cls):
        cls.primary_conn.drop_database("test")
        close_client(cls.primary_conn)
        cls.repl_set.stop()

    def setUp(self):
        self.namespace_config = NamespaceConfig()
        self.opman = OplogThread(
            primary_client=self.primary_conn,
            doc_managers=(DocManager(),),
            oplog_progress_dict=LockingDict(),
            namespace_config=self.namespace_config,
        )

    def tearDown(self):
        try:
            self.opman.join()
        except RuntimeError:
            # OplogThread may not have been started
            pass

    def reset_include_fields(self, fields):
        self.opman.namespace_config = NamespaceConfig(include_fields=fields)

    def reset_exclude_fields(self, fields):
        self.opman.namespace_config = NamespaceConfig(exclude_fields=fields)

    def test_filter_fields(self):
        docman = self.opman.doc_managers[0]
        conn = self.opman.primary_client

        include_fields = ["a", "b", "c"]
        exclude_fields = ["d", "e", "f"]

        # Set fields to care about
        self.reset_include_fields(include_fields)
        # Documents have more than just these fields
        doc = {"a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6, "_id": 1}
        db = conn["test"]["test"]
        db.insert_one(doc)
        assert_soon(lambda: db.count() == 1)
        self.opman.dump_collection()

        result = docman._search()[0]
        keys = result.keys()
        for inc, exc in zip(include_fields, exclude_fields):
            self.assertIn(inc, keys)
            self.assertNotIn(exc, keys)

    def test_filter_exclude_oplog_entry(self):
        # Test oplog entries: these are callables, since
        # filter_oplog_entry modifies the oplog entry in-place
        def insert_op():
            return {"op": "i", "o": {"_id": 0, "a": 1, "b": 2, "c": 3}}

        def update_op():
            return {
                "op": "u",
                "o": {"$set": {"a": 4, "b": 5}, "$unset": {"c": True}},
                "o2": {"_id": 1},
            }

        def filter_doc(document, fields):
            if fields and "_id" in fields:
                fields.remove("_id")
            return self.opman.filter_oplog_entry(document, exclude_fields=fields)

        # Case 0: insert op, no fields provided
        filtered = filter_doc(insert_op(), None)
        self.assertEqual(filtered, insert_op())

        # Case 1: insert op, fields provided
        filtered = filter_doc(insert_op(), ["c"])
        self.assertEqual(filtered["o"], {"_id": 0, "a": 1, "b": 2})

        # Case 2: insert op, fields provided, doc becomes empty except for _id
        filtered = filter_doc(insert_op(), ["a", "b", "c"])
        self.assertEqual(filtered["o"], {"_id": 0})

        # Case 3: update op, no fields provided
        filtered = filter_doc(update_op(), None)
        self.assertEqual(filtered, update_op())

        # Case 4: update op, fields provided
        filtered = filter_doc(update_op(), ["b"])
        self.assertNotIn("b", filtered["o"]["$set"])
        self.assertIn("a", filtered["o"]["$set"])
        self.assertEqual(filtered["o"]["$unset"], update_op()["o"]["$unset"])

        # Case 5: update op, fields provided, empty $set
        filtered = filter_doc(update_op(), ["a", "b"])
        self.assertNotIn("$set", filtered["o"])
        self.assertEqual(filtered["o"]["$unset"], update_op()["o"]["$unset"])

        # Case 6: update op, fields provided, empty $unset
        filtered = filter_doc(update_op(), ["c"])
        self.assertNotIn("$unset", filtered["o"])
        self.assertEqual(filtered["o"]["$set"], update_op()["o"]["$set"])

        # Case 7: update op, fields provided, entry is nullified
        filtered = filter_doc(update_op(), ["a", "b", "c"])
        self.assertEqual(filtered, None)

        # Case 8: update op, fields provided, replacement
        filtered = filter_doc(
            {"op": "u", "o": {"a": 1, "b": 2, "c": 3, "d": 4}}, ["d", "e", "f"]
        )
        self.assertEqual(filtered, {"op": "u", "o": {"a": 1, "b": 2, "c": 3}})

    def test_filter_oplog_entry(self):
        # Test oplog entries: these are callables, since
        # filter_oplog_entry modifies the oplog entry in-place
        def insert_op():
            return {"op": "i", "o": {"_id": 0, "a": 1, "b": 2, "c": 3}}

        def update_op():
            return {
                "op": "u",
                "o": {"$set": {"a": 4, "b": 5}, "$unset": {"c": True}},
                "o2": {"_id": 1},
            }

        def filter_doc(document, fields):
            if fields and "_id" not in fields:
                fields.append("_id")
            return self.opman.filter_oplog_entry(document, include_fields=fields)

        # Case 0: insert op, no fields provided
        filtered = filter_doc(insert_op(), None)
        self.assertEqual(filtered, insert_op())

        # Case 1: insert op, fields provided
        filtered = filter_doc(insert_op(), ["a", "b"])
        self.assertEqual(filtered["o"], {"_id": 0, "a": 1, "b": 2})

        # Case 2: insert op, fields provided, doc becomes empty except for _id
        filtered = filter_doc(insert_op(), ["d", "e", "f"])
        self.assertEqual(filtered["o"], {"_id": 0})

        # Case 3: update op, no fields provided
        filtered = filter_doc(update_op(), None)
        self.assertEqual(filtered, update_op())

        # Case 4: update op, fields provided
        filtered = filter_doc(update_op(), ["a", "c"])
        self.assertNotIn("b", filtered["o"]["$set"])
        self.assertIn("a", filtered["o"]["$set"])
        self.assertEqual(filtered["o"]["$unset"], update_op()["o"]["$unset"])

        # Case 5: update op, fields provided, empty $set
        filtered = filter_doc(update_op(), ["c"])
        self.assertNotIn("$set", filtered["o"])
        self.assertEqual(filtered["o"]["$unset"], update_op()["o"]["$unset"])

        # Case 6: update op, fields provided, empty $unset
        filtered = filter_doc(update_op(), ["a", "b"])
        self.assertNotIn("$unset", filtered["o"])
        self.assertEqual(filtered["o"]["$set"], update_op()["o"]["$set"])

        # Case 7: update op, fields provided, entry is nullified
        filtered = filter_doc(update_op(), ["d", "e", "f"])
        self.assertEqual(filtered, None)

        # Case 8: update op, fields provided, replacement
        filtered = filter_doc(
            {"op": "u", "o": {"a": 1, "b": 2, "c": 3, "d": 4}}, ["a", "b", "c"]
        )
        self.assertEqual(filtered, {"op": "u", "o": {"a": 1, "b": 2, "c": 3}})

    def test_nested_fields(self):
        def check_nested(document, fields, filtered_document, op="i"):
            if "_id" not in fields:
                fields.append("_id")
            filtered_result = self.opman.filter_oplog_entry(
                {"op": op, "o": document}, include_fields=fields
            )
            if filtered_result is not None:
                filtered_result = filtered_result["o"]
            self.assertEqual(filtered_result, filtered_document)

        document = {"name": "Han Solo", "a": {"b": {}}}
        fields = ["name", "a.b.c"]
        filtered_document = {"name": "Han Solo"}
        check_nested(document, fields, filtered_document)

        document = {"a": {"b": {"c": 2, "e": 3}, "e": 5}, "b": 2, "c": {"g": 1}}
        fields = ["a.b.c", "a.e"]
        filtered_document = {"a": {"b": {"c": 2}, "e": 5}}
        check_nested(document, fields, filtered_document)

        document = {
            "a": {"b": {"c": 2, "e": 3}, "e": 5},
            "b": 2,
            "c": {"g": 1},
            "_id": 1,
        }
        fields = ["a.b.c", "a.e"]
        filtered_document = {"a": {"b": {"c": 2}, "e": 5}, "_id": 1}
        check_nested(document, fields, filtered_document)

        document = {"a": {"b": {"c": {"d": 1}}}, "-a": {"-b": {"-c": 2}}}
        fields = ["a.b", "-a"]
        filtered_document = document.copy()
        check_nested(document, fields, filtered_document)

        document = {"a": {"b": {"c": {"d": 1}}}, "-a": {"-b": {"-c": 2}}}
        fields = ["a", "-a.-b"]
        filtered_document = document.copy()
        check_nested(document, fields, filtered_document)
        document = {"a": {"b": {"c": {"d": 1}}}, "-a": {"-b": {"-c": 2}}, "_id": 1}

        fields = ["a.b", "-a"]
        filtered_document = document.copy()
        check_nested(document, fields, filtered_document)
        fields = ["a", "-a.-b"]
        check_nested(document, fields, filtered_document)

        document = {"test": 1}
        fields = ["doesnt_exist"]
        filtered_document = {}
        check_nested(document, fields, filtered_document)

        document = {"a": {"b": 1}, "b": {"a": 1}}
        fields = ["a.b", "b.a"]
        filtered_document = document.copy()
        check_nested(document, fields, filtered_document)

        document = {"a": {"b": {"a": {"b": 1}}}, "c": {"a": {"b": 1}}}
        fields = ["a.b"]
        filtered_document = {"a": {"b": {"a": {"b": 1}}}}
        check_nested(document, fields, filtered_document)

        document = {"name": "anna", "name_of_cat": "pushkin"}
        fields = ["name"]
        filtered_document = {"name": "anna"}
        check_nested(document, fields, filtered_document)

        update = {"$set": {"a.b": 1, "a.c": 3, "b": 2, "c": {"b": 3}}}
        fields = ["a", "c"]
        filtered_update = {"$set": {"a.b": 1, "a.c": 3, "c": {"b": 3}}}
        check_nested(update, fields, filtered_update, op="u")

        update = {"$set": {"a.b": {"c": 3, "d": 1}, "a.e": 1, "a.f": 2}}
        fields = ["a.b.c", "a.e"]
        filtered_update = {"$set": {"a.b": {"c": 3}, "a.e": 1}}
        check_nested(update, fields, filtered_update, op="u")

        update = {"$set": {"a.b.1": 1, "a.b.2": 2, "b": 3}}
        fields = ["a.b"]
        filtered_update = {"$set": {"a.b.1": 1, "a.b.2": 2}}
        check_nested(update, fields, filtered_update, op="u")

        update = {"$set": {"a.b": {"c": 3, "d": 1}, "a.e": 1}}
        fields = ["a.b.e"]
        filtered_update = None
        check_nested(update, fields, filtered_update, op="u")

    def test_nested_exclude_fields(self):
        def check_nested(document, exclude_fields, filtered_document, op="i"):
            if "_id" in exclude_fields:
                exclude_fields.remove("_id")
            filtered_result = self.opman.filter_oplog_entry(
                {"op": op, "o": document}, exclude_fields=exclude_fields
            )
            if filtered_result is not None:
                filtered_result = filtered_result["o"]
            self.assertEqual(filtered_result, filtered_document)

        document = {"a": {"b": {"c": {"d": 0, "e": 1}}}}
        exclude_fields = ["a.b.c.d"]
        filtered_document = {"a": {"b": {"c": {"e": 1}}}}
        check_nested(document, exclude_fields, filtered_document)

        document = {"a": {"b": {"c": {"-a": 0, "d": {"e": {"f": 1}}}}}}
        exclude_fields = ["a.b.c.d.e.f"]
        filtered_document = {"a": {"b": {"c": {"-a": 0, "d": {"e": {}}}}}}
        check_nested(document, exclude_fields, filtered_document)

        document = {"a": 1}
        exclude_fields = ["a"]
        filtered_document = {}
        check_nested(document, exclude_fields, filtered_document)

        document = {"a": {"b": {"c": 2, "e": 3}, "e": 5}, "b": 2, "c": {"g": 1}}
        exclude_fields = ["a.b.c", "a.e"]
        filtered_document = {"a": {"b": {"e": 3}}, "b": 2, "c": {"g": 1}}
        check_nested(document, exclude_fields, filtered_document)

        document = {
            "a": {"b": {"c": 2, "e": 3}, "e": 5},
            "b": 2,
            "c": {"g": 1},
            "_id": 1,
        }
        exclude_fields = ["a.b.c", "a.e", "_id"]
        filtered_document = {"a": {"b": {"e": 3}}, "b": 2, "c": {"g": 1}, "_id": 1}
        check_nested(document, exclude_fields, filtered_document)

        document = {"a": {"b": {"c": {"d": 1}}}, "-a": {"-b": {"-c": 2}}}
        exclude_fields = ["a.b", "-a"]
        filtered_document = {"a": {}}
        check_nested(document, exclude_fields, filtered_document)

        document = {"a": {"b": {"c": {"d": 1}}}, "-a": {"-b": {"-c": 2}}}
        exclude_fields = ["a", "-a.-b"]
        filtered_document = {"-a": {}}
        check_nested(document, exclude_fields, filtered_document)

        document = {"a": {"b": {"c": {"d": 1}}}, "-a": {"-b": {"-c": 2}}, "_id": 1}
        exclude_fields = ["a.b", "-a"]
        filtered_document = {"_id": 1, "a": {}}
        check_nested(document, exclude_fields, filtered_document)

        document = {"test": 1}
        exclude_fields = ["doesnt_exist"]
        filtered_document = document.copy()
        check_nested(document, exclude_fields, filtered_document)

        document = {"test": 1}
        exclude_fields = ["test.doesnt_exist"]
        filtered_document = document.copy()
        check_nested(document, exclude_fields, filtered_document)

        document = {"a": {"b": 1}, "b": {"a": 1}}
        exclude_fields = ["a.b", "b.a"]
        filtered_document = {"a": {}, "b": {}}
        check_nested(document, exclude_fields, filtered_document)

        document = {"a": {"b": {"a": {"b": 1}}}, "c": {"a": {"b": 1}}}
        exclude_fields = ["a.b"]
        filtered_document = {"a": {}, "c": {"a": {"b": 1}}}
        check_nested(document, exclude_fields, filtered_document)

        document = {"name": "anna", "name_of_cat": "pushkin"}
        exclude_fields = ["name"]
        filtered_document = {"name_of_cat": "pushkin"}
        check_nested(document, exclude_fields, filtered_document)

        update = {"$set": {"a.b": 1, "a.c": 3, "b": 2, "c": {"b": 3}}}
        exclude_fields = ["a", "c"]
        filtered_update = {"$set": {"b": 2}}
        check_nested(update, exclude_fields, filtered_update, op="u")

        update = {"$set": {"a.b": {"c": 3, "d": 1}, "a.e": 1, "a.f": 2}}
        exclude_fields = ["a.b.c", "a.e"]
        filtered_update = {"$set": {"a.b": {"d": 1}, "a.f": 2}}
        check_nested(update, exclude_fields, filtered_update, op="u")

        update = {"$set": {"a.b": {"c": 3, "d": 1}, "a.e": 1}}
        exclude_fields = ["a.b.c", "a.b.d", "a.e"]
        filtered_update = {"$set": {"a.b": {}}}
        check_nested(update, exclude_fields, filtered_update, op="u")

        update = {"$set": {"a.b.1": 1, "a.b.2": 2, "b": 3}}
        exclude_fields = ["a.b"]
        filtered_update = {"$set": {"b": 3}}
        check_nested(update, exclude_fields, filtered_update, op="u")

        update = {"$set": {"a.b.c": 42, "d.e.f": 123, "g": 456}}
        exclude_fields = ["a.b", "d"]
        filtered_update = {"$set": {"g": 456}}
        check_nested(update, exclude_fields, filtered_update, op="u")

        update = {"$set": {"a.b": {"c": 3, "d": 1}, "a.e": 1}}
        exclude_fields = ["a.b", "a.e"]
        filtered_update = None
        check_nested(update, exclude_fields, filtered_update, op="u")
Esempio n. 12
0
class TestFilterFields(unittest.TestCase):

    @classmethod
    def setUpClass(cls):
        cls.repl_set = ReplicaSetSingle().start()
        cls.primary_conn = cls.repl_set.client()
        cls.oplog_coll = cls.primary_conn.local['oplog.rs']

    @classmethod
    def tearDownClass(cls):
        cls.primary_conn.drop_database("test")
        close_client(cls.primary_conn)
        cls.repl_set.stop()

    def setUp(self):
        self.namespace_config = NamespaceConfig()
        self.opman = OplogThread(
            primary_client=self.primary_conn,
            doc_managers=(DocManager(),),
            oplog_progress_dict=LockingDict(),
            namespace_config=self.namespace_config
        )

    def tearDown(self):
        try:
            self.opman.join()
        except RuntimeError:
            # OplogThread may not have been started
            pass

    def reset_include_fields(self, fields):
        self.opman.namespace_config = NamespaceConfig(include_fields=fields)

    def reset_exclude_fields(self, fields):
        self.opman.namespace_config = NamespaceConfig(exclude_fields=fields)

    def test_filter_fields(self):
        docman = self.opman.doc_managers[0]
        conn = self.opman.primary_client

        include_fields = ["a", "b", "c"]
        exclude_fields = ["d", "e", "f"]

        # Set fields to care about
        self.reset_include_fields(include_fields)
        # Documents have more than just these fields
        doc = {
            "a": 1, "b": 2, "c": 3,
            "d": 4, "e": 5, "f": 6,
            "_id": 1
        }
        db = conn['test']['test']
        db.insert_one(doc)
        assert_soon(lambda: db.count() == 1)
        self.opman.dump_collection()

        result = docman._search()[0]
        keys = result.keys()
        for inc, exc in zip(include_fields, exclude_fields):
            self.assertIn(inc, keys)
            self.assertNotIn(exc, keys)

    def test_filter_exclude_oplog_entry(self):
        # Test oplog entries: these are callables, since
        # filter_oplog_entry modifies the oplog entry in-place
        insert_op = lambda: {
            "op": "i",
            "o": {
                "_id": 0,
                "a": 1,
                "b": 2,
                "c": 3
            }
        }
        update_op = lambda: {
            "op": "u",
            "o": {
                "$set": {
                    "a": 4,
                    "b": 5
                },
                "$unset": {
                    "c": True
                }
            },
            "o2": {
                "_id": 1
            }
        }

        def filter_doc(document, fields):
            if fields and '_id' in fields:
                fields.remove('_id')
            return self.opman.filter_oplog_entry(
                document, exclude_fields=fields)

        # Case 0: insert op, no fields provided
        filtered = filter_doc(insert_op(), None)
        self.assertEqual(filtered, insert_op())

        # Case 1: insert op, fields provided
        filtered = filter_doc(insert_op(), ['c'])
        self.assertEqual(filtered['o'], {'_id': 0, 'a': 1, 'b': 2})

        # Case 2: insert op, fields provided, doc becomes empty except for _id
        filtered = filter_doc(insert_op(), ['a', 'b', 'c'])
        self.assertEqual(filtered['o'], {'_id': 0})

        # Case 3: update op, no fields provided
        filtered = filter_doc(update_op(), None)
        self.assertEqual(filtered, update_op())

        # Case 4: update op, fields provided
        filtered = filter_doc(update_op(), ['b'])
        self.assertNotIn('b', filtered['o']['$set'])
        self.assertIn('a', filtered['o']['$set'])
        self.assertEqual(filtered['o']['$unset'], update_op()['o']['$unset'])

        # Case 5: update op, fields provided, empty $set
        filtered = filter_doc(update_op(), ['a', 'b'])
        self.assertNotIn('$set', filtered['o'])
        self.assertEqual(filtered['o']['$unset'], update_op()['o']['$unset'])

        # Case 6: update op, fields provided, empty $unset
        filtered = filter_doc(update_op(), ['c'])
        self.assertNotIn('$unset', filtered['o'])
        self.assertEqual(filtered['o']['$set'], update_op()['o']['$set'])

        # Case 7: update op, fields provided, entry is nullified
        filtered = filter_doc(update_op(), ['a', 'b', 'c'])
        self.assertEqual(filtered, None)

        # Case 8: update op, fields provided, replacement
        filtered = filter_doc({
            'op': 'u',
            'o': {'a': 1, 'b': 2, 'c': 3, 'd': 4}
        }, ['d', 'e', 'f'])
        self.assertEqual(
            filtered, {'op': 'u', 'o': {'a': 1, 'b': 2, 'c': 3}})

    def test_filter_oplog_entry(self):
        # Test oplog entries: these are callables, since
        # filter_oplog_entry modifies the oplog entry in-place
        insert_op = lambda: {
            "op": "i",
            "o": {
                "_id": 0,
                "a": 1,
                "b": 2,
                "c": 3
            }
        }
        update_op = lambda: {
            "op": "u",
            "o": {
                "$set": {
                    "a": 4,
                    "b": 5
                },
                "$unset": {
                    "c": True
                }
            },
            "o2": {
                "_id": 1
            }
        }

        def filter_doc(document, fields):
            if fields and '_id' not in fields:
                fields.append('_id')
            return self.opman.filter_oplog_entry(
                document, include_fields=fields)

        # Case 0: insert op, no fields provided
        filtered = filter_doc(insert_op(), None)
        self.assertEqual(filtered, insert_op())

        # Case 1: insert op, fields provided
        filtered = filter_doc(insert_op(), ['a', 'b'])
        self.assertEqual(filtered['o'], {'_id': 0, 'a': 1, 'b': 2})

        # Case 2: insert op, fields provided, doc becomes empty except for _id
        filtered = filter_doc(insert_op(), ['d', 'e', 'f'])
        self.assertEqual(filtered['o'], {'_id': 0})

        # Case 3: update op, no fields provided
        filtered = filter_doc(update_op(), None)
        self.assertEqual(filtered, update_op())

        # Case 4: update op, fields provided
        filtered = filter_doc(update_op(), ['a', 'c'])
        self.assertNotIn('b', filtered['o']['$set'])
        self.assertIn('a', filtered['o']['$set'])
        self.assertEqual(filtered['o']['$unset'], update_op()['o']['$unset'])

        # Case 5: update op, fields provided, empty $set
        filtered = filter_doc(update_op(), ['c'])
        self.assertNotIn('$set', filtered['o'])
        self.assertEqual(filtered['o']['$unset'], update_op()['o']['$unset'])

        # Case 6: update op, fields provided, empty $unset
        filtered = filter_doc(update_op(), ['a', 'b'])
        self.assertNotIn('$unset', filtered['o'])
        self.assertEqual(filtered['o']['$set'], update_op()['o']['$set'])

        # Case 7: update op, fields provided, entry is nullified
        filtered = filter_doc(update_op(), ['d', 'e', 'f'])
        self.assertEqual(filtered, None)

        # Case 8: update op, fields provided, replacement
        filtered = filter_doc({
            'op': 'u',
            'o': {'a': 1, 'b': 2, 'c': 3, 'd': 4}
        }, ['a', 'b', 'c'])
        self.assertEqual(
            filtered, {'op': 'u', 'o': {'a': 1, 'b': 2, 'c': 3}})

    def test_nested_fields(self):
        def check_nested(document, fields, filtered_document, op='i'):
            if '_id' not in fields:
                fields.append('_id')
            filtered_result = self.opman.filter_oplog_entry(
                {'op': op, 'o': document}, include_fields=fields)
            if filtered_result is not None:
                filtered_result = filtered_result['o']
            self.assertEqual(filtered_result, filtered_document)

        document = {'name': 'Han Solo', 'a': {'b': {}}}
        fields = ['name', 'a.b.c']
        filtered_document = {'name': 'Han Solo'}
        check_nested(document, fields, filtered_document)

        document = {'a': {'b': {'c': 2, 'e': 3}, 'e': 5},
                    'b': 2,
                    'c': {'g': 1}}
        fields = ['a.b.c', 'a.e']
        filtered_document = {'a': {'b': {'c': 2}, 'e': 5}}
        check_nested(document, fields, filtered_document)

        document = {'a': {'b': {'c': 2, 'e': 3}, 'e': 5},
                    'b': 2,
                    'c': {'g': 1},
                    '_id': 1}
        fields = ['a.b.c', 'a.e']
        filtered_document = {'a': {'b': {'c': 2}, 'e': 5}, '_id': 1}
        check_nested(document, fields, filtered_document)

        document = {'a': {'b': {'c': {'d': 1}}}, '-a': {'-b': {'-c': 2}}}
        fields = ['a.b', '-a']
        filtered_document = document.copy()
        check_nested(document, fields, filtered_document)

        document = {'a': {'b': {'c': {'d': 1}}}, '-a': {'-b': {'-c': 2}}}
        fields = ['a', '-a.-b']
        filtered_document = document.copy()
        check_nested(document, fields, filtered_document)
        document = {'a': {'b': {'c': {'d': 1}}}, '-a': {'-b': {'-c': 2}},
                    '_id': 1}

        fields = ['a.b', '-a']
        filtered_document = document.copy()
        check_nested(document, fields, filtered_document)
        fields = ['a', '-a.-b']
        check_nested(document, fields, filtered_document)

        document = {'test': 1}
        fields = ['doesnt_exist']
        filtered_document = {}
        check_nested(document, fields, filtered_document)

        document = {'a': {'b': 1}, 'b': {'a': 1}}
        fields = ['a.b', 'b.a']
        filtered_document = document.copy()
        check_nested(document, fields, filtered_document)

        document = {'a': {'b': {'a': {'b': 1}}}, 'c': {'a': {'b': 1}}}
        fields = ['a.b']
        filtered_document = {'a': {'b': {'a': {'b': 1}}}}
        check_nested(document, fields, filtered_document)

        document = {'name': 'anna', 'name_of_cat': 'pushkin'}
        fields = ['name']
        filtered_document = {'name': 'anna'}
        check_nested(document, fields, filtered_document)

        update = {'$set': {'a.b': 1, 'a.c': 3, 'b': 2, 'c': {'b': 3}}}
        fields = ['a', 'c']
        filtered_update = {'$set': {'a.b': 1, 'a.c': 3, 'c': {'b': 3}}}
        check_nested(update, fields, filtered_update, op='u')

        update = {'$set': {'a.b': {'c': 3, 'd': 1}, 'a.e': 1, 'a.f': 2}}
        fields = ['a.b.c', 'a.e']
        filtered_update = {'$set': {'a.b': {'c': 3}, 'a.e': 1}}
        check_nested(update, fields, filtered_update, op='u')

        update = {'$set': {'a.b.1': 1, 'a.b.2': 2, 'b': 3}}
        fields = ['a.b']
        filtered_update = {'$set': {'a.b.1': 1, 'a.b.2': 2}}
        check_nested(update, fields, filtered_update, op='u')

        update = {'$set': {'a.b': {'c': 3, 'd': 1}, 'a.e': 1}}
        fields = ['a.b.e']
        filtered_update = None
        check_nested(update, fields, filtered_update, op='u')

    def test_nested_exclude_fields(self):
        def check_nested(document, exclude_fields, filtered_document, op='i'):
            if '_id' in exclude_fields:
                exclude_fields.remove('_id')
            filtered_result = self.opman.filter_oplog_entry(
                {'op': op, 'o': document}, exclude_fields=exclude_fields)
            if filtered_result is not None:
                filtered_result = filtered_result['o']
            self.assertEqual(filtered_result, filtered_document)

        document = {'a': {'b': {'c': {'d': 0, 'e': 1}}}}
        exclude_fields = ['a.b.c.d']
        filtered_document = {'a': {'b': {'c': {'e': 1}}}}
        check_nested(document, exclude_fields, filtered_document)

        document = {'a': {'b': {'c': {'-a': 0, 'd': {'e': {'f': 1}}}}}}
        exclude_fields = ['a.b.c.d.e.f']
        filtered_document = {'a': {'b': {'c': {'-a': 0, 'd': {'e': {}}}}}}
        check_nested(document, exclude_fields, filtered_document)

        document = {'a': 1}
        exclude_fields = ['a']
        filtered_document = {}
        check_nested(document, exclude_fields, filtered_document)

        document = {'a': {'b': {'c': 2, 'e': 3}, 'e': 5},
                    'b': 2,
                    'c': {'g': 1}}
        exclude_fields = ['a.b.c', 'a.e']
        filtered_document = {'a': {'b': {'e': 3}},
                             'b': 2,
                             'c': {'g': 1}}
        check_nested(document, exclude_fields, filtered_document)

        document = {'a': {'b': {'c': 2, 'e': 3}, 'e': 5},
                    'b': 2,
                    'c': {'g': 1},
                    '_id': 1}
        exclude_fields = ['a.b.c', 'a.e', '_id']
        filtered_document = {'a': {'b': {'e': 3}},
                             'b': 2, 'c': {'g': 1},
                             '_id': 1}
        check_nested(document, exclude_fields, filtered_document)

        document = {'a': {'b': {'c': {'d': 1}}},
                    '-a': {'-b': {'-c': 2}}}
        exclude_fields = ['a.b', '-a']
        filtered_document = {'a': {}}
        check_nested(document, exclude_fields, filtered_document)

        document = {'a': {'b': {'c': {'d': 1}}},
                    '-a': {'-b': {'-c': 2}}}
        exclude_fields = ['a', '-a.-b']
        filtered_document = {'-a': {}}
        check_nested(document, exclude_fields, filtered_document)

        document = {'a': {'b': {'c': {'d': 1}}},
                    '-a': {'-b': {'-c': 2}},
                    '_id': 1}
        exclude_fields = ['a.b', '-a']
        filtered_document = {'_id': 1, 'a': {}}
        check_nested(document, exclude_fields, filtered_document)

        document = {'test': 1}
        exclude_fields = ['doesnt_exist']
        filtered_document = document.copy()
        check_nested(document, exclude_fields, filtered_document)

        document = {'test': 1}
        exclude_fields = ['test.doesnt_exist']
        filtered_document = document.copy()
        check_nested(document, exclude_fields, filtered_document)

        document = {'a': {'b': 1}, 'b': {'a': 1}}
        exclude_fields = ['a.b', 'b.a']
        filtered_document = {'a': {}, 'b': {}}
        check_nested(document, exclude_fields, filtered_document)

        document = {'a': {'b': {'a': {'b': 1}}}, 'c': {'a': {'b': 1}}}
        exclude_fields = ['a.b']
        filtered_document = {'a': {}, 'c': {'a': {'b': 1}}}
        check_nested(document, exclude_fields, filtered_document)

        document = {'name': 'anna', 'name_of_cat': 'pushkin'}
        exclude_fields = ['name']
        filtered_document = {'name_of_cat': 'pushkin'}
        check_nested(document, exclude_fields, filtered_document)

        update = {'$set': {'a.b': 1, 'a.c': 3, 'b': 2, 'c': {'b': 3}}}
        exclude_fields = ['a', 'c']
        filtered_update = {'$set': {'b': 2}}
        check_nested(update, exclude_fields, filtered_update, op='u')

        update = {'$set': {'a.b': {'c': 3, 'd': 1}, 'a.e': 1, 'a.f': 2}}
        exclude_fields = ['a.b.c', 'a.e']
        filtered_update = {'$set': {'a.b': {'d': 1}, 'a.f': 2}}
        check_nested(update, exclude_fields, filtered_update, op='u')

        update = {'$set': {'a.b': {'c': 3, 'd': 1}, 'a.e': 1}}
        exclude_fields = ['a.b.c', 'a.b.d', 'a.e']
        filtered_update = {'$set': {'a.b': {}}}
        check_nested(update, exclude_fields, filtered_update, op='u')

        update = {'$set': {'a.b.1': 1, 'a.b.2': 2, 'b': 3}}
        exclude_fields = ['a.b']
        filtered_update = {'$set': {'b': 3}}
        check_nested(update, exclude_fields, filtered_update, op='u')

        update = {'$set': {'a.b.c': 42, 'd.e.f': 123, 'g': 456}}
        exclude_fields = ['a.b', 'd']
        filtered_update = {'$set': {'g': 456}}
        check_nested(update, exclude_fields, filtered_update, op='u')

        update = {'$set': {'a.b': {'c': 3, 'd': 1}, 'a.e': 1}}
        exclude_fields = ['a.b', 'a.e']
        filtered_update = None
        check_nested(update, exclude_fields, filtered_update, op='u')
Esempio n. 13
0
class TestOplogManager(unittest.TestCase):
    """Defines all the testing methods, as well as a method that sets up the
        cluster
    """
    def setUp(self):
        self.repl_set = ReplicaSetSingle().start()
        self.primary_conn = self.repl_set.client()
        self.oplog_coll = self.primary_conn.local['oplog.rs']
        self.dest_mapping_stru = DestMapping([], [], {})
        self.opman = OplogThread(
            primary_client=self.primary_conn,
            doc_managers=(DocManager(), ),
            oplog_progress_dict=LockingDict(),
            dest_mapping_stru=self.dest_mapping_stru,
        )

    def tearDown(self):
        try:
            self.opman.join()
        except RuntimeError:
            pass  # OplogThread may not have been started
        self.primary_conn.drop_database("test")
        close_client(self.primary_conn)
        self.repl_set.stop()

    def test_get_oplog_cursor(self):
        """Test the get_oplog_cursor method"""

        # timestamp is None - all oplog entries excluding no-ops are returned.
        cursor = self.opman.get_oplog_cursor(None)
        self.assertEqual(
            cursor.count(), self.primary_conn["local"]["oplog.rs"].find({
                'op': {
                    '$ne': 'n'
                }
            }).count())

        # earliest entry is the only one at/after timestamp
        doc = {"ts": bson.Timestamp(1000, 0), "i": 1}
        self.primary_conn["test"]["test"].insert_one(doc)
        latest_timestamp = self.opman.get_last_oplog_timestamp()
        cursor = self.opman.get_oplog_cursor(latest_timestamp)
        self.assertNotEqual(cursor, None)
        self.assertEqual(cursor.count(), 1)
        next_entry_id = next(cursor)['o']['_id']
        retrieved = self.primary_conn.test.test.find_one(next_entry_id)
        self.assertEqual(retrieved, doc)

        # many entries before and after timestamp
        self.primary_conn["test"]["test"].insert_many([{
            "i": i
        } for i in range(2, 1002)])
        oplog_cursor = self.oplog_coll.find(
            {
                'op': {
                    '$ne': 'n'
                },
                'ns': {
                    '$not': re.compile(r'\.(system|\$cmd)')
                }
            },
            sort=[("ts", pymongo.ASCENDING)])

        # initial insert + 1000 more inserts
        self.assertEqual(oplog_cursor.count(), 1 + 1000)
        pivot = oplog_cursor.skip(400).limit(-1)[0]

        goc_cursor = self.opman.get_oplog_cursor(pivot["ts"])
        self.assertEqual(goc_cursor.count(), 1 + 1000 - 400)

    def test_get_last_oplog_timestamp(self):
        """Test the get_last_oplog_timestamp method"""

        # "empty" the oplog
        self.opman.oplog = self.primary_conn["test"]["emptycollection"]
        self.assertEqual(self.opman.get_last_oplog_timestamp(), None)

        # Test non-empty oplog
        self.opman.oplog = self.primary_conn["local"]["oplog.rs"]
        for i in range(1000):
            self.primary_conn["test"]["test"].insert_one({"i": i + 500})
        oplog = self.primary_conn["local"]["oplog.rs"]
        oplog = oplog.find().sort("$natural", pymongo.DESCENDING).limit(-1)[0]
        self.assertEqual(self.opman.get_last_oplog_timestamp(), oplog["ts"])

    def test_dump_collection(self):
        """Test the dump_collection method

        Cases:

        1. empty oplog
        2. non-empty oplog
        3. non-empty oplog, specified a namespace-set, none of the oplog
           entries are for collections in the namespace-set
        """

        # Test with empty oplog
        self.opman.oplog = self.primary_conn["test"]["emptycollection"]
        last_ts = self.opman.dump_collection()
        self.assertEqual(last_ts, None)

        # Test with non-empty oplog
        self.opman.oplog = self.primary_conn["local"]["oplog.rs"]
        for i in range(1000):
            self.primary_conn["test"]["test"].insert_one({"i": i + 500})
        last_ts = self.opman.get_last_oplog_timestamp()
        self.assertEqual(last_ts, self.opman.dump_collection())
        self.assertEqual(len(self.opman.doc_managers[0]._search()), 1000)

        # Case 3
        # 1MB oplog so that we can rollover quickly
        repl_set = ReplicaSetSingle(oplogSize=1).start()
        conn = repl_set.client()
        dest_mapping_stru = DestMapping(["test.test"], [], {})
        opman = OplogThread(primary_client=conn,
                            doc_managers=(DocManager(), ),
                            oplog_progress_dict=LockingDict(),
                            dest_mapping_stru=dest_mapping_stru,
                            ns_set=set(["test.test"]))
        # Insert a document into a ns_set collection
        conn["test"]["test"].insert_one({"test": 1})
        # Cause the oplog to rollover on a non-ns_set collection
        while conn["local"]["oplog.rs"].find_one({"ns": "test.test"}):
            conn["test"]["ignored"].insert_many([{
                "test": "1" * 1024
            } for _ in range(1024)])
        last_ts = opman.get_last_oplog_timestamp()
        self.assertEqual(last_ts, opman.dump_collection())
        self.assertEqual(len(opman.doc_managers[0]._search()), 1)
        conn.close()
        repl_set.stop()

    def test_skipped_oplog_entry_updates_checkpoint(self):
        repl_set = ReplicaSetSingle().start()
        conn = repl_set.client()
        opman = OplogThread(primary_client=conn,
                            doc_managers=(DocManager(), ),
                            oplog_progress_dict=LockingDict(),
                            dest_mapping_stru=DestMapping(["test.test"], [],
                                                          {}),
                            ns_set=set(["test.test"]))
        opman.start()

        # Insert a document into a ns_set collection
        conn["test"]["test"].insert_one({"test": 1})
        last_ts = opman.get_last_oplog_timestamp()
        assert_soon(
            lambda: last_ts == opman.checkpoint,
            "OplogThread never updated checkpoint to non-skipped "
            "entry.")
        self.assertEqual(len(opman.doc_managers[0]._search()), 1)

        # Make sure that the oplog thread updates its checkpoint on every
        # oplog entry.
        conn["test"]["ignored"].insert_one({"test": 1})
        last_ts = opman.get_last_oplog_timestamp()
        assert_soon(lambda: last_ts == opman.checkpoint,
                    "OplogThread never updated checkpoint to skipped entry.")
        opman.join()
        conn.close()
        repl_set.stop()

    def test_dump_collection_with_error(self):
        """Test the dump_collection method with invalid documents.

        Cases:

        1. non-empty oplog, continue_on_error=True, invalid documents
        """

        # non-empty oplog, continue_on_error=True, invalid documents
        self.opman.continue_on_error = True
        self.opman.oplog = self.primary_conn["local"]["oplog.rs"]

        docs = [{'a': i} for i in range(100)]
        for i in range(50, 60):
            docs[i]['_upsert_exception'] = True
        self.primary_conn['test']['test'].insert_many(docs)

        last_ts = self.opman.get_last_oplog_timestamp()
        self.assertEqual(last_ts, self.opman.dump_collection())
        docs = self.opman.doc_managers[0]._search()
        docs.sort(key=lambda doc: doc['a'])

        self.assertEqual(len(docs), 90)
        expected_a = itertools.chain(range(0, 50), range(60, 100))
        for doc, correct_a in zip(docs, expected_a):
            self.assertEqual(doc['a'], correct_a)

    def test_dump_collection_cancel(self):
        """Test that dump_collection returns None when cancelled."""
        self.primary_conn["test"]["test"].insert_one({"test": "1"})

        # Pretend that the OplogThead was cancelled
        self.opman.running = False
        self.assertIsNone(self.opman.dump_collection())

    def test_init_cursor(self):
        """Test the init_cursor method

        Cases:

        1. no last checkpoint, no collection dump
        2. no last checkpoint, collection dump ok and stuff to dump
        3. no last checkpoint, nothing to dump, stuff in oplog
        4. no last checkpoint, nothing to dump, nothing in oplog
        5. no last checkpoint, no collection dump, stuff in oplog
        6. last checkpoint exists
        7. last checkpoint is behind
        """

        # N.B. these sub-cases build off of each other and cannot be re-ordered
        # without side-effects

        # No last checkpoint, no collection dump, nothing in oplog
        # "change oplog collection" to put nothing in oplog
        self.opman.oplog = self.primary_conn["test"]["emptycollection"]
        self.opman.collection_dump = False
        self.assertTrue(
            all(doc['op'] == 'n' for doc in self.opman.init_cursor()[0]))
        self.assertEqual(self.opman.checkpoint, None)

        # No last checkpoint, empty collections, nothing in oplog
        self.opman.collection_dump = True
        cursor, cursor_empty = self.opman.init_cursor()
        self.assertEqual(cursor, None)
        self.assertTrue(cursor_empty)
        self.assertEqual(self.opman.checkpoint, None)

        # No last checkpoint, empty collections, something in oplog
        self.opman.oplog = self.primary_conn['local']['oplog.rs']
        collection = self.primary_conn["test"]["test"]
        collection.insert_one({"i": 1})
        collection.delete_one({"i": 1})
        time.sleep(3)
        last_ts = self.opman.get_last_oplog_timestamp()
        cursor, cursor_empty = self.opman.init_cursor()
        self.assertFalse(cursor_empty)
        self.assertEqual(self.opman.checkpoint, last_ts)
        self.assertEqual(self.opman.read_last_checkpoint(), last_ts)

        # No last checkpoint, no collection dump, something in oplog
        # If collection dump is false the checkpoint should not be set
        self.opman.checkpoint = None
        self.opman.oplog_progress = LockingDict()
        self.opman.collection_dump = False
        collection.insert_one({"i": 2})
        cursor, cursor_empty = self.opman.init_cursor()
        for doc in cursor:
            last_doc = doc
        self.assertEqual(last_doc['o']['i'], 2)
        self.assertIsNone(self.opman.checkpoint)

        # Last checkpoint exists, no collection dump, something in oplog
        collection.insert_many([{"i": i + 500} for i in range(1000)])
        entry = list(self.primary_conn["local"]["oplog.rs"].find(skip=200,
                                                                 limit=-2))
        self.opman.update_checkpoint(entry[0]["ts"])
        cursor, cursor_empty = self.opman.init_cursor()
        self.assertEqual(next(cursor)["ts"], entry[1]["ts"])
        self.assertEqual(self.opman.checkpoint, entry[0]["ts"])
        self.assertEqual(self.opman.read_last_checkpoint(), entry[0]["ts"])

        # Last checkpoint is behind
        self.opman.update_checkpoint(bson.Timestamp(1, 0))
        cursor, cursor_empty = self.opman.init_cursor()
        self.assertTrue(cursor_empty)
        self.assertEqual(cursor, None)
        self.assertEqual(self.opman.checkpoint, bson.Timestamp(1, 0))

    def test_namespace_mapping(self):
        """Test mapping of namespaces
        Cases:

        upsert/delete/update of documents:
        1. in namespace set, mapping provided
        2. outside of namespace set, mapping provided
        """

        source_ns = ["test.test1", "test.test2"]
        phony_ns = ["test.phony1", "test.phony2"]
        dest_mapping = {
            "test.test1": "test.test1_dest",
            "test.test2": "test.test2_dest"
        }
        dest_mapping_stru = DestMapping(source_ns, [], dest_mapping)
        self.opman.dest_mapping = dest_mapping
        self.opman.dest_mapping_stru = dest_mapping_stru
        self.opman.namespace_set = source_ns
        docman = self.opman.doc_managers[0]
        # start replicating
        self.opman.start()

        base_doc = {"_id": 1, "name": "superman"}

        # doc in namespace set
        for ns in source_ns:
            db, coll = ns.split(".", 1)

            # test insert
            self.primary_conn[db][coll].insert_one(base_doc)

            assert_soon(lambda: len(docman._search()) == 1)
            self.assertEqual(docman._search()[0]["ns"], dest_mapping[ns])
            bad = [d for d in docman._search() if d["ns"] == ns]
            self.assertEqual(len(bad), 0)

            # test update
            self.primary_conn[db][coll].update_one(
                {"_id": 1}, {"$set": {
                    "weakness": "kryptonite"
                }})

            def update_complete():
                docs = docman._search()
                for d in docs:
                    if d.get("weakness") == "kryptonite":
                        return True
                    return False

            assert_soon(update_complete)
            self.assertEqual(docman._search()[0]["ns"], dest_mapping[ns])
            bad = [d for d in docman._search() if d["ns"] == ns]
            self.assertEqual(len(bad), 0)

            # test delete
            self.primary_conn[db][coll].delete_one({"_id": 1})
            assert_soon(lambda: len(docman._search()) == 0)
            bad = [d for d in docman._search() if d["ns"] == dest_mapping[ns]]
            self.assertEqual(len(bad), 0)

            # cleanup
            self.primary_conn[db][coll].delete_many({})
            self.opman.doc_managers[0]._delete()

        # doc not in namespace set
        for ns in phony_ns:
            db, coll = ns.split(".", 1)

            # test insert
            self.primary_conn[db][coll].insert_one(base_doc)
            time.sleep(1)
            self.assertEqual(len(docman._search()), 0)
            # test update
            self.primary_conn[db][coll].update_one(
                {"_id": 1}, {"$set": {
                    "weakness": "kryptonite"
                }})
            time.sleep(1)
            self.assertEqual(len(docman._search()), 0)

    def test_many_targets(self):
        """Test that one OplogThread is capable of replicating to more than
        one target.
        """
        doc_managers = [DocManager(), DocManager(), DocManager()]
        self.opman.doc_managers = doc_managers

        # start replicating
        self.opman.start()
        self.primary_conn["test"]["test"].insert_one({
            "name": "kermit",
            "color": "green"
        })
        self.primary_conn["test"]["test"].insert_one({
            "name": "elmo",
            "color": "firetruck red"
        })

        assert_soon(
            lambda: sum(len(d._search()) for d in doc_managers) == 6,
            "OplogThread should be able to replicate to multiple targets")

        self.primary_conn["test"]["test"].delete_one({"name": "elmo"})

        assert_soon(
            lambda: sum(len(d._search()) for d in doc_managers) == 3,
            "OplogThread should be able to replicate to multiple targets")
        for d in doc_managers:
            self.assertEqual(d._search()[0]["name"], "kermit")

    def test_upgrade_oplog_progress(self):
        first_oplog_ts = self.opman.oplog.find_one()['ts']
        # Old format oplog progress file:
        progress = {str(self.opman.oplog): bson_ts_to_long(first_oplog_ts)}
        # Set up oplog managers to use the old format.
        oplog_progress = LockingDict()
        oplog_progress.dict = progress
        self.opman.oplog_progress = oplog_progress
        # Cause the oplog managers to update their checkpoints.
        self.opman.update_checkpoint(first_oplog_ts)
        # New format should be in place now.
        new_format = {self.opman.replset_name: first_oplog_ts}
        self.assertEqual(new_format, self.opman.oplog_progress.get_dict())
class TestFilterFields(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        cls.repl_set = ReplicaSetSingle().start()
        cls.primary_conn = cls.repl_set.client()
        cls.oplog_coll = cls.primary_conn.local['oplog.rs']

    @classmethod
    def tearDownClass(cls):
        cls.primary_conn.drop_database("test")
        close_client(cls.primary_conn)
        cls.repl_set.stop()

    def setUp(self):
        self.dest_mapping_stru = DestMapping([], [], {})
        self.opman = OplogThread(primary_client=self.primary_conn,
                                 doc_managers=(DocManager(), ),
                                 oplog_progress_dict=LockingDict(),
                                 dest_mapping_stru=self.dest_mapping_stru)

    def tearDown(self):
        try:
            self.opman.join()
        except RuntimeError:
            # OplogThread may not have been started
            pass

    def _check_fields(self, opman, fields, exclude_fields, projection):
        if fields:
            self.assertEqual(sorted(opman.fields), sorted(fields))
            self.assertEqual(opman._fields, set(fields))
        else:
            self.assertEqual(opman.fields, None)
            self.assertEqual(opman._fields, set([]))
        if exclude_fields:
            self.assertEqual(sorted(opman.exclude_fields),
                             sorted(exclude_fields))
            self.assertEqual(opman._exclude_fields, set(exclude_fields))
        else:
            self.assertEqual(opman.exclude_fields, None)
            self.assertEqual(opman._exclude_fields, set([]))

        self.assertEqual(opman._projection, projection)

    def test_filter_fields(self):
        docman = self.opman.doc_managers[0]
        conn = self.opman.primary_client

        include_fields = ["a", "b", "c"]
        exclude_fields = ["d", "e", "f"]

        # Set fields to care about
        self.opman.fields = include_fields
        # Documents have more than just these fields
        doc = {"a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6, "_id": 1}
        db = conn['test']['test']
        db.insert_one(doc)
        assert_soon(lambda: db.count() == 1)
        self.opman.dump_collection()

        result = docman._search()[0]
        keys = result.keys()
        for inc, exc in zip(include_fields, exclude_fields):
            self.assertIn(inc, keys)
            self.assertNotIn(exc, keys)

    def test_filter_exclude_oplog_entry(self):
        # Test oplog entries: these are callables, since
        # filter_oplog_entry modifies the oplog entry in-place
        insert_op = lambda: {
            "op": "i",
            "o": {
                "_id": 0,
                "a": 1,
                "b": 2,
                "c": 3
            }
        }
        update_op = lambda: {
            "op": "u",
            "o": {
                "$set": {
                    "a": 4,
                    "b": 5
                },
                "$unset": {
                    "c": True
                }
            },
            "o2": {
                "_id": 1
            }
        }

        # Case 0: insert op, no fields provided
        self.opman.exclude_fields = None
        filtered = self.opman.filter_oplog_entry(insert_op())
        self.assertEqual(filtered, insert_op())
        self.assertEqual(None, self.opman._projection)

        # Case 1: insert op, fields provided
        self.opman.exclude_fields = ['c']
        filtered = self.opman.filter_oplog_entry(insert_op())
        self.assertEqual(filtered['o'], {'_id': 0, 'a': 1, 'b': 2})
        self.assertEqual({'c': 0}, self.opman._projection)

        # Case 2: insert op, fields provided, doc becomes empty except for _id
        self.opman.exclude_fields = ['a', 'b', 'c']
        filtered = self.opman.filter_oplog_entry(insert_op())
        self.assertEqual(filtered['o'], {'_id': 0})
        self.assertEqual({'a': 0, 'b': 0, 'c': 0}, self.opman._projection)

        # Case 3: update op, no fields provided
        self.opman.exclude_fields = None
        filtered = self.opman.filter_oplog_entry(update_op())
        self.assertEqual(filtered, update_op())
        self.assertEqual(None, self.opman._projection)

        # Case 4: update op, fields provided
        self.opman.exclude_fields = ['b']
        filtered = self.opman.filter_oplog_entry(update_op())
        self.assertNotIn('b', filtered['o']['$set'])
        self.assertIn('a', filtered['o']['$set'])
        self.assertEqual(filtered['o']['$unset'], update_op()['o']['$unset'])
        self.assertEqual({'b': 0}, self.opman._projection)

        # Case 5: update op, fields provided, empty $set
        self.opman.exclude_fields = ['a', 'b']
        filtered = self.opman.filter_oplog_entry(update_op())
        self.assertNotIn('$set', filtered['o'])
        self.assertEqual(filtered['o']['$unset'], update_op()['o']['$unset'])
        self.assertEqual({'a': 0, 'b': 0}, self.opman._projection)

        # Case 6: update op, fields provided, empty $unset
        self.opman.exclude_fields = ['c']
        filtered = self.opman.filter_oplog_entry(update_op())
        self.assertNotIn('$unset', filtered['o'])
        self.assertEqual(filtered['o']['$set'], update_op()['o']['$set'])
        self.assertEqual({'c': 0}, self.opman._projection)

        # Case 7: update op, fields provided, entry is nullified
        self.opman.exclude_fields = ['a', 'b', 'c']
        filtered = self.opman.filter_oplog_entry(update_op())
        self.assertEqual(filtered, None)
        self.assertEqual({'a': 0, 'b': 0, 'c': 0}, self.opman._projection)

        # Case 8: update op, fields provided, replacement
        self.opman.exclude_fields = ['d', 'e', 'f']
        filtered = self.opman.filter_oplog_entry({
            'op': 'u',
            'o': {
                'a': 1,
                'b': 2,
                'c': 3,
                'd': 4
            }
        })
        self.assertEqual(filtered, {'op': 'u', 'o': {'a': 1, 'b': 2, 'c': 3}})
        self.assertEqual({'d': 0, 'e': 0, 'f': 0}, self.opman._projection)

    def test_filter_oplog_entry(self):
        # Test oplog entries: these are callables, since
        # filter_oplog_entry modifies the oplog entry in-place
        insert_op = lambda: {
            "op": "i",
            "o": {
                "_id": 0,
                "a": 1,
                "b": 2,
                "c": 3
            }
        }
        update_op = lambda: {
            "op": "u",
            "o": {
                "$set": {
                    "a": 4,
                    "b": 5
                },
                "$unset": {
                    "c": True
                }
            },
            "o2": {
                "_id": 1
            }
        }

        # Case 0: insert op, no fields provided
        self.opman.fields = None
        filtered = self.opman.filter_oplog_entry(insert_op())
        self.assertEqual(filtered, insert_op())
        self.assertEqual(None, self.opman._projection)

        # Case 1: insert op, fields provided
        self.opman.fields = ['a', 'b']
        filtered = self.opman.filter_oplog_entry(insert_op())
        self.assertEqual(filtered['o'], {'_id': 0, 'a': 1, 'b': 2})
        self.assertEqual({'_id': 1, 'a': 1, 'b': 1}, self.opman._projection)

        # Case 2: insert op, fields provided, doc becomes empty except for _id
        self.opman.fields = ['d', 'e', 'f']
        filtered = self.opman.filter_oplog_entry(insert_op())
        self.assertEqual(filtered['o'], {'_id': 0})
        self.assertEqual({
            '_id': 1,
            'd': 1,
            'e': 1,
            'f': 1
        }, self.opman._projection)

        # Case 3: update op, no fields provided
        self.opman.fields = None
        filtered = self.opman.filter_oplog_entry(update_op())
        self.assertEqual(filtered, update_op())
        self.assertEqual(None, self.opman._projection)

        # Case 4: update op, fields provided
        self.opman.fields = ['a', 'c']
        filtered = self.opman.filter_oplog_entry(update_op())
        self.assertNotIn('b', filtered['o']['$set'])
        self.assertIn('a', filtered['o']['$set'])
        self.assertEqual(filtered['o']['$unset'], update_op()['o']['$unset'])
        self.assertEqual({'_id': 1, 'a': 1, 'c': 1}, self.opman._projection)

        # Case 5: update op, fields provided, empty $set
        self.opman.fields = ['c']
        filtered = self.opman.filter_oplog_entry(update_op())
        self.assertNotIn('$set', filtered['o'])
        self.assertEqual(filtered['o']['$unset'], update_op()['o']['$unset'])
        self.assertEqual({'_id': 1, 'c': 1}, self.opman._projection)

        # Case 6: update op, fields provided, empty $unset
        self.opman.fields = ['a', 'b']
        filtered = self.opman.filter_oplog_entry(update_op())
        self.assertNotIn('$unset', filtered['o'])
        self.assertEqual(filtered['o']['$set'], update_op()['o']['$set'])
        self.assertEqual({'_id': 1, 'a': 1, 'b': 1}, self.opman._projection)

        # Case 7: update op, fields provided, entry is nullified
        self.opman.fields = ['d', 'e', 'f']
        filtered = self.opman.filter_oplog_entry(update_op())
        self.assertEqual(filtered, None)
        self.assertEqual({
            '_id': 1,
            'd': 1,
            'e': 1,
            'f': 1
        }, self.opman._projection)

        # Case 8: update op, fields provided, replacement
        self.opman.fields = ['a', 'b', 'c']
        filtered = self.opman.filter_oplog_entry({
            'op': 'u',
            'o': {
                'a': 1,
                'b': 2,
                'c': 3,
                'd': 4
            }
        })
        self.assertEqual(filtered, {'op': 'u', 'o': {'a': 1, 'b': 2, 'c': 3}})
        self.assertEqual({
            '_id': 1,
            'a': 1,
            'b': 1,
            'c': 1
        }, self.opman._projection)

    def test_exclude_fields_constructor(self):
        # Test with the "_id" field in exclude_fields
        exclude_fields = ["_id", "title", "content", "author"]
        opman = OplogThread(primary_client=self.primary_conn,
                            doc_managers=(DocManager(), ),
                            oplog_progress_dict=LockingDict(),
                            dest_mapping_stru=self.dest_mapping_stru,
                            exclude_fields=exclude_fields)
        exclude_fields.remove('_id')
        self._check_fields(opman, [], exclude_fields,
                           dict((f, 0) for f in exclude_fields))
        extra_fields = exclude_fields + ['extra1', 'extra2']
        filtered = opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual(dict((f, 1) for f in ['extra1', 'extra2']), filtered)

        # Test without "_id" field included in exclude_fields
        exclude_fields = ["title", "content", "author"]
        opman = OplogThread(primary_client=self.primary_conn,
                            doc_managers=(DocManager(), ),
                            oplog_progress_dict=LockingDict(),
                            dest_mapping_stru=self.dest_mapping_stru,
                            exclude_fields=exclude_fields)
        self._check_fields(opman, [], exclude_fields,
                           dict((f, 0) for f in exclude_fields))
        extra_fields = extra_fields + ['extra1', 'extra2']
        filtered = opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual({'extra1': 1, 'extra2': 1}, filtered)

        # Test with only "_id" field in exclude_fields
        exclude_fields = ["_id"]
        opman = OplogThread(primary_client=self.primary_conn,
                            doc_managers=(DocManager(), ),
                            oplog_progress_dict=LockingDict(),
                            dest_mapping_stru=self.dest_mapping_stru,
                            exclude_fields=exclude_fields)
        self._check_fields(opman, [], [], None)
        extra_fields = exclude_fields + ['extra1', 'extra2']
        filtered = opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual(dict((f, 1) for f in extra_fields), filtered)

        # Test with nothing set for exclude_fields
        opman = OplogThread(primary_client=self.primary_conn,
                            doc_managers=(DocManager(), ),
                            oplog_progress_dict=LockingDict(),
                            dest_mapping_stru=self.dest_mapping_stru,
                            exclude_fields=None)
        self._check_fields(opman, [], [], None)
        extra_fields = ['_id', 'extra1', 'extra2']
        filtered = opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual(dict((f, 1) for f in extra_fields), filtered)

    def test_fields_constructor(self):
        # Test with "_id" field in constructor
        fields = ["_id", "title", "content", "author"]
        opman = OplogThread(primary_client=self.primary_conn,
                            doc_managers=(DocManager(), ),
                            oplog_progress_dict=LockingDict(),
                            dest_mapping_stru=self.dest_mapping_stru,
                            fields=fields)
        self._check_fields(opman, fields, [], dict((f, 1) for f in fields))
        extra_fields = fields + ['extra1', 'extra2']
        filtered = opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual(dict((f, 1) for f in fields), filtered)

        # Test without "_id" field in constructor
        fields = ["title", "content", "author"]
        opman = OplogThread(primary_client=self.primary_conn,
                            doc_managers=(DocManager(), ),
                            oplog_progress_dict=LockingDict(),
                            dest_mapping_stru=self.dest_mapping_stru,
                            fields=fields)
        fields.append('_id')
        self._check_fields(opman, fields, [], dict((f, 1) for f in fields))
        extra_fields = fields + ['extra1', 'extra2']
        filtered = opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual(dict((f, 1) for f in fields), filtered)

        # Test with only "_id" field
        fields = ["_id"]
        opman = OplogThread(primary_client=self.primary_conn,
                            doc_managers=(DocManager(), ),
                            oplog_progress_dict=LockingDict(),
                            dest_mapping_stru=self.dest_mapping_stru,
                            fields=fields)
        self._check_fields(opman, fields, [], dict((f, 1) for f in fields))
        extra_fields = fields + ['extra1', 'extra2']
        filtered = opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual({'_id': 1}, filtered)

        # Test with no fields set
        opman = OplogThread(primary_client=self.primary_conn,
                            doc_managers=(DocManager(), ),
                            oplog_progress_dict=LockingDict(),
                            dest_mapping_stru=self.dest_mapping_stru)
        self._check_fields(opman, [], [], None)
        extra_fields = ['_id', 'extra1', 'extra2']
        filtered = opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual(dict((f, 1) for f in extra_fields), filtered)

    def test_exclude_fields_attr(self):
        # Test with the "_id" field in exclude_fields.
        exclude_fields = ["_id", "title", "content", "author"]
        exclude_fields.remove('_id')
        self.opman.exclude_fields = exclude_fields
        self._check_fields(self.opman, [], exclude_fields,
                           dict((f, 0) for f in exclude_fields))
        extra_fields = exclude_fields + ['extra1', 'extra2']
        filtered = self.opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual(dict((f, 1) for f in ['extra1', 'extra2']), filtered)

        # Test without "_id" field included in exclude_fields
        exclude_fields = ["title", "content", "author"]
        self.opman.exclude_fields = exclude_fields
        self._check_fields(self.opman, [], exclude_fields,
                           dict((f, 0) for f in exclude_fields))
        extra_fields = extra_fields + ['extra1', 'extra2']
        filtered = self.opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual({'extra1': 1, 'extra2': 1}, filtered)

        # Test with only "_id" field in exclude_fields
        exclude_fields = ["_id"]
        self.opman.exclude_fields = exclude_fields
        self._check_fields(self.opman, [], [], None)
        extra_fields = exclude_fields + ['extra1', 'extra2']
        filtered = self.opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual(dict((f, 1) for f in extra_fields), filtered)

        # Test with nothing set for exclude_fields
        self.opman.exclude_fields = None
        self._check_fields(self.opman, [], [], None)
        extra_fields = ['_id', 'extra1', 'extra2']
        filtered = self.opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual(dict((f, 1) for f in extra_fields), filtered)

    def test_fields_attr(self):
        # Test with "_id" field included in fields
        fields = ["_id", "title", "content", "author"]
        self.opman.fields = fields
        self._check_fields(self.opman, fields, [], dict(
            (f, 1) for f in fields))
        extra_fields = fields + ['extra1', 'extra2']
        filtered = self.opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual(dict((f, 1) for f in fields), filtered)

        # Test without "_id" field included in fields
        fields = ["title", "content", "author"]
        self.opman.fields = fields
        fields.append('_id')
        self._check_fields(self.opman, fields, [], dict(
            (f, 1) for f in fields))
        extra_fields = fields + ['extra1', 'extra2']
        filtered = self.opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual(dict((f, 1) for f in fields), filtered)

        # Test with only "_id" field
        fields = ["_id"]
        self.opman.fields = fields
        self._check_fields(self.opman, fields, [], dict(
            (f, 1) for f in fields))
        extra_fields = fields + ['extra1', 'extra2']
        filtered = self.opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual({'_id': 1}, filtered)

        # Test with no fields set
        self.opman.fields = None
        self._check_fields(self.opman, [], [], None)
        extra_fields = ['_id', 'extra1', 'extra2']
        filtered = self.opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual(dict((f, 1) for f in extra_fields), filtered)

    def test_nested_fields(self):
        def check_nested(document, fields, filtered_document, op='i'):
            self.opman.fields = fields
            fields.append('_id')
            self.assertEqual(set(fields), self.opman._fields)
            self.assertEqual(sorted(fields), sorted(self.opman.fields))
            filtered_result = self.opman.filter_oplog_entry({
                'op': op,
                'o': document
            })
            if filtered_result is not None:
                filtered_result = filtered_result['o']
            self.assertEqual(filtered_result, filtered_document)

        document = {'name': 'Han Solo', 'a': {'b': {}}}
        fields = ['name', 'a.b.c']
        filtered_document = {'name': 'Han Solo'}
        check_nested(document, fields, filtered_document)

        document = {
            'a': {
                'b': {
                    'c': 2,
                    'e': 3
                },
                'e': 5
            },
            'b': 2,
            'c': {
                'g': 1
            }
        }
        fields = ['a.b.c', 'a.e']
        filtered_document = {'a': {'b': {'c': 2}, 'e': 5}}
        check_nested(document, fields, filtered_document)

        document = {
            'a': {
                'b': {
                    'c': 2,
                    'e': 3
                },
                'e': 5
            },
            'b': 2,
            'c': {
                'g': 1
            },
            '_id': 1
        }
        fields = ['a.b.c', 'a.e']
        filtered_document = {'a': {'b': {'c': 2}, 'e': 5}, '_id': 1}
        check_nested(document, fields, filtered_document)

        document = {'a': {'b': {'c': {'d': 1}}}, '-a': {'-b': {'-c': 2}}}
        fields = ['a.b', '-a']
        filtered_document = document.copy()
        check_nested(document, fields, filtered_document)

        document = {'a': {'b': {'c': {'d': 1}}}, '-a': {'-b': {'-c': 2}}}
        fields = ['a', '-a.-b']
        filtered_document = document.copy()
        check_nested(document, fields, filtered_document)
        document = {
            'a': {
                'b': {
                    'c': {
                        'd': 1
                    }
                }
            },
            '-a': {
                '-b': {
                    '-c': 2
                }
            },
            '_id': 1
        }

        fields = ['a.b', '-a']
        filtered_document = document.copy()
        check_nested(document, fields, filtered_document)
        fields = ['a', '-a.-b']
        check_nested(document, fields, filtered_document)

        document = {'test': 1}
        fields = ['doesnt_exist']
        filtered_document = {}
        check_nested(document, fields, filtered_document)

        document = {'a': {'b': 1}, 'b': {'a': 1}}
        fields = ['a.b', 'b.a']
        filtered_document = document.copy()
        check_nested(document, fields, filtered_document)

        document = {'a': {'b': {'a': {'b': 1}}}, 'c': {'a': {'b': 1}}}
        fields = ['a.b']
        filtered_document = {'a': {'b': {'a': {'b': 1}}}}
        check_nested(document, fields, filtered_document)

        document = {'name': 'anna', 'name_of_cat': 'pushkin'}
        fields = ['name']
        filtered_document = {'name': 'anna'}
        check_nested(document, fields, filtered_document)

        update = {'$set': {'a.b': 1, 'a.c': 3, 'b': 2, 'c': {'b': 3}}}
        fields = ['a', 'c']
        filtered_update = {'$set': {'a.b': 1, 'a.c': 3, 'c': {'b': 3}}}
        check_nested(update, fields, filtered_update, op='u')

        update = {'$set': {'a.b': {'c': 3, 'd': 1}, 'a.e': 1, 'a.f': 2}}
        fields = ['a.b.c', 'a.e']
        filtered_update = {'$set': {'a.b': {'c': 3}, 'a.e': 1}}
        check_nested(update, fields, filtered_update, op='u')

        update = {'$set': {'a.b.1': 1, 'a.b.2': 2, 'b': 3}}
        fields = ['a.b']
        filtered_update = {'$set': {'a.b.1': 1, 'a.b.2': 2}}
        check_nested(update, fields, filtered_update, op='u')

        update = {'$set': {'a.b': {'c': 3, 'd': 1}, 'a.e': 1}}
        fields = ['a.b.e']
        filtered_update = None
        check_nested(update, fields, filtered_update, op='u')

    def test_nested_exclude_fields(self):
        def check_nested(document, exclude_fields, filtered_document, op='i'):
            self.opman.exclude_fields = exclude_fields
            if '_id' in exclude_fields:
                exclude_fields.remove('_id')
            self.assertEqual(set(exclude_fields), self.opman._exclude_fields)
            self.assertEqual(sorted(exclude_fields),
                             sorted(self.opman.exclude_fields))
            filtered_result = self.opman.filter_oplog_entry({
                'op': op,
                'o': document
            })
            if filtered_result is not None:
                filtered_result = filtered_result['o']
            self.assertEqual(filtered_result, filtered_document)

        document = {'a': {'b': {'c': {'d': 0, 'e': 1}}}}
        exclude_fields = ['a.b.c.d']
        filtered_document = {'a': {'b': {'c': {'e': 1}}}}
        check_nested(document, exclude_fields, filtered_document)

        document = {'a': {'b': {'c': {'-a': 0, 'd': {'e': {'f': 1}}}}}}
        exclude_fields = ['a.b.c.d.e.f']
        filtered_document = {'a': {'b': {'c': {'-a': 0, 'd': {'e': {}}}}}}
        check_nested(document, exclude_fields, filtered_document)

        document = {'a': 1}
        exclude_fields = ['a']
        filtered_document = {}
        check_nested(document, exclude_fields, filtered_document)

        document = {
            'a': {
                'b': {
                    'c': 2,
                    'e': 3
                },
                'e': 5
            },
            'b': 2,
            'c': {
                'g': 1
            }
        }
        exclude_fields = ['a.b.c', 'a.e']
        filtered_document = {'a': {'b': {'e': 3}}, 'b': 2, 'c': {'g': 1}}
        check_nested(document, exclude_fields, filtered_document)

        document = {
            'a': {
                'b': {
                    'c': 2,
                    'e': 3
                },
                'e': 5
            },
            'b': 2,
            'c': {
                'g': 1
            },
            '_id': 1
        }
        exclude_fields = ['a.b.c', 'a.e', '_id']
        filtered_document = {
            'a': {
                'b': {
                    'e': 3
                }
            },
            'b': 2,
            'c': {
                'g': 1
            },
            '_id': 1
        }
        check_nested(document, exclude_fields, filtered_document)

        document = {'a': {'b': {'c': {'d': 1}}}, '-a': {'-b': {'-c': 2}}}
        exclude_fields = ['a.b', '-a']
        filtered_document = {'a': {}}
        check_nested(document, exclude_fields, filtered_document)

        document = {'a': {'b': {'c': {'d': 1}}}, '-a': {'-b': {'-c': 2}}}
        exclude_fields = ['a', '-a.-b']
        filtered_document = {'-a': {}}
        check_nested(document, exclude_fields, filtered_document)

        document = {
            'a': {
                'b': {
                    'c': {
                        'd': 1
                    }
                }
            },
            '-a': {
                '-b': {
                    '-c': 2
                }
            },
            '_id': 1
        }
        exclude_fields = ['a.b', '-a']
        filtered_document = {'_id': 1, 'a': {}}
        check_nested(document, exclude_fields, filtered_document)

        document = {'test': 1}
        exclude_fields = ['doesnt_exist']
        filtered_document = document.copy()
        check_nested(document, exclude_fields, filtered_document)

        document = {'test': 1}
        exclude_fields = ['test.doesnt_exist']
        filtered_document = document.copy()
        check_nested(document, exclude_fields, filtered_document)

        document = {'a': {'b': 1}, 'b': {'a': 1}}
        exclude_fields = ['a.b', 'b.a']
        filtered_document = {'a': {}, 'b': {}}
        check_nested(document, exclude_fields, filtered_document)

        document = {'a': {'b': {'a': {'b': 1}}}, 'c': {'a': {'b': 1}}}
        exclude_fields = ['a.b']
        filtered_document = {'a': {}, 'c': {'a': {'b': 1}}}
        check_nested(document, exclude_fields, filtered_document)

        document = {'name': 'anna', 'name_of_cat': 'pushkin'}
        exclude_fields = ['name']
        filtered_document = {'name_of_cat': 'pushkin'}
        check_nested(document, exclude_fields, filtered_document)

        update = {'$set': {'a.b': 1, 'a.c': 3, 'b': 2, 'c': {'b': 3}}}
        exclude_fields = ['a', 'c']
        filtered_update = {'$set': {'b': 2}}
        check_nested(update, exclude_fields, filtered_update, op='u')

        update = {'$set': {'a.b': {'c': 3, 'd': 1}, 'a.e': 1, 'a.f': 2}}
        exclude_fields = ['a.b.c', 'a.e']
        filtered_update = {'$set': {'a.b': {'d': 1}, 'a.f': 2}}
        check_nested(update, exclude_fields, filtered_update, op='u')

        update = {'$set': {'a.b': {'c': 3, 'd': 1}, 'a.e': 1}}
        exclude_fields = ['a.b.c', 'a.b.d', 'a.e']
        filtered_update = {'$set': {'a.b': {}}}
        check_nested(update, exclude_fields, filtered_update, op='u')

        update = {'$set': {'a.b.1': 1, 'a.b.2': 2, 'b': 3}}
        exclude_fields = ['a.b']
        filtered_update = {'$set': {'b': 3}}
        check_nested(update, exclude_fields, filtered_update, op='u')

        update = {'$set': {'a.b.c': 42, 'd.e.f': 123, 'g': 456}}
        exclude_fields = ['a.b', 'd']
        filtered_update = {'$set': {'g': 456}}
        check_nested(update, exclude_fields, filtered_update, op='u')

        update = {'$set': {'a.b': {'c': 3, 'd': 1}, 'a.e': 1}}
        exclude_fields = ['a.b', 'a.e']
        filtered_update = None
        check_nested(update, exclude_fields, filtered_update, op='u')

    def test_fields_and_exclude(self):
        fields = ['a', 'b', 'c', '_id']
        exclude_fields = ['x', 'y', 'z']

        # Test setting both to None in constructor
        opman = OplogThread(primary_client=self.primary_conn,
                            doc_managers=(DocManager(), ),
                            oplog_progress_dict=LockingDict(),
                            dest_mapping_stru=self.dest_mapping_stru,
                            fields=None,
                            exclude_fields=None)
        self._check_fields(opman, [], [], None)
        opman = OplogThread(primary_client=self.primary_conn,
                            doc_managers=(DocManager(), ),
                            oplog_progress_dict=LockingDict(),
                            dest_mapping_stru=self.dest_mapping_stru,
                            fields=None,
                            exclude_fields=exclude_fields)
        self._check_fields(opman, [], exclude_fields,
                           dict((f, 0) for f in exclude_fields))
        # Test setting fields when exclude_fields is set
        self.assertRaises(errors.InvalidConfiguration, setattr, opman,
                          "fields", fields)
        self.assertRaises(errors.InvalidConfiguration, setattr, opman,
                          "fields", None)
        opman = OplogThread(primary_client=self.primary_conn,
                            doc_managers=(DocManager(), ),
                            oplog_progress_dict=LockingDict(),
                            dest_mapping_stru=self.dest_mapping_stru,
                            exclude_fields=None,
                            fields=fields)
        self._check_fields(opman, fields, [], dict((f, 1) for f in fields))
        self.assertRaises(errors.InvalidConfiguration, setattr, opman,
                          "exclude_fields", exclude_fields)
        self.assertRaises(errors.InvalidConfiguration, setattr, opman,
                          "exclude_fields", None)
        self.assertRaises(errors.InvalidConfiguration,
                          OplogThread,
                          self.primary_conn, (DocManager(), ),
                          LockingDict(),
                          self.dest_mapping_stru,
                          fields=fields,
                          exclude_fields=exclude_fields)
class TestOplogManager(unittest.TestCase):
    """Defines all the testing methods, as well as a method that sets up the
        cluster
    """
    def setUp(self):
        self.repl_set = ReplicaSetSingle().start()
        self.primary_conn = self.repl_set.client()
        self.oplog_coll = self.primary_conn.local['oplog.rs']

    def reset_opman(self, include_ns=None, exclude_ns=None, dest_mapping=None):
        if include_ns is None:
            include_ns = []
        if exclude_ns is None:
            exclude_ns = []
        if dest_mapping is None:
            dest_mapping = {}

        # include_ns must not exist together with exclude_ns
        # dest_mapping must exist together with include_ns
        # those checks have been tested in test_config.py so we skip that here.

        self.dest_mapping_stru = DestMapping(include_ns, exclude_ns,
                                             dest_mapping)
        self.opman = OplogThread(primary_client=self.primary_conn,
                                 doc_managers=(DocManager(), ),
                                 oplog_progress_dict=LockingDict(),
                                 dest_mapping_stru=self.dest_mapping_stru,
                                 ns_set=include_ns,
                                 ex_ns_set=exclude_ns)

    def init_dbs(self):
        # includedb1.* & includedb2.includecol1 are interested collections
        self.primary_conn["includedb1"]["includecol1"].insert_many([{
            "idb1col1":
            i
        } for i in range(1, 3)])

        self.primary_conn["includedb1"]["includecol2"].insert_many([{
            "idb1col2":
            i
        } for i in range(1, 3)])

        self.primary_conn["includedb2"]["includecol1"].insert_many([{
            "idb2col1":
            i
        } for i in range(1, 3)])

        # the others are not interested collections
        self.primary_conn["includedb2"]["excludecol2"].insert_many([{
            "idb2col2":
            i
        } for i in range(1, 3)])

        self.primary_conn["excludedb3"]["excludecol1"].insert_many([{
            "idb3col1":
            i
        } for i in range(1, 3)])

    def tearDown(self):
        try:
            self.opman.join()
        except RuntimeError:
            pass  # OplogThread may not have been started

        for db in self.primary_conn.database_names():
            if db != "local":
                self.primary_conn.drop_database(db)
        close_client(self.primary_conn)
        self.repl_set.stop()

    def test_get_oplog_cursor(self):
        '''Test the get_oplog_cursor method'''

        # Put something in the dbs
        self.init_dbs()

        # timestamp is None - all oplog entries excluding no-ops are returned.
        # wildcard include case no impact the result
        self.reset_opman(["includedb1.*", "includedb2.includecol1"], [], {})

        got_cursor = self.opman.get_oplog_cursor(None)
        oplog_cursor = self.oplog_coll.find({'op': {'$ne': 'n'}})
        self.assertNotEqual(got_cursor, None)
        self.assertEqual(got_cursor.count(), oplog_cursor.count())

        # wildcard exclude case no impact the result
        self.reset_opman([], ["includedb2.excludecol2", "excludedb3.*"], {})

        got_cursor = self.opman.get_oplog_cursor(None)
        oplog_cursor = self.oplog_coll.find({'op': {'$ne': 'n'}})
        self.assertNotEqual(got_cursor, None)
        self.assertEqual(got_cursor.count(), oplog_cursor.count())

        # earliest entry is the only one at/after timestamp
        doc = {"ts": bson.Timestamp(1000, 0), "idb1col1": 1}
        self.primary_conn["includedb1"]["includecol1"].insert_one(doc)
        latest_timestamp = self.opman.get_last_oplog_timestamp()
        cursor = self.opman.get_oplog_cursor(latest_timestamp)
        self.assertNotEqual(cursor, None)
        self.assertEqual(cursor.count(), 1)
        next_entry_id = next(cursor)['o']['_id']
        retrieved = self.primary_conn.includedb1.includecol1.find_one(
            next_entry_id)
        self.assertEqual(retrieved, doc)

        # many entries before and after timestamp
        self.primary_conn["includedb1"]["includecol1"].insert_many([{
            "idb1col1":
            i
        } for i in range(2, 1002)])
        oplog_cursor = self.oplog_coll.find(
            {
                'op': {
                    '$ne': 'n'
                },
                'ns': {
                    '$not': re.compile(r'\.(system|\$cmd)')
                }
            },
            sort=[("ts", pymongo.ASCENDING)])

        # initial insert + 1000 more inserts
        self.assertEqual(oplog_cursor.count(), 11 + 1000)
        pivot = oplog_cursor.skip(400).limit(-1)[0]

        goc_cursor = self.opman.get_oplog_cursor(pivot["ts"])
        self.assertEqual(goc_cursor.count(), 11 + 1000 - 400)

    def test_get_last_oplog_timestamp(self):
        """Test the get_last_oplog_timestamp method"""

        # empty oplog case has been tested in test_oplog_manager.py,
        # skip that here.

        # Put something in the dbs
        self.init_dbs()

        # Test non-empty oplog
        self.reset_opman(["includedb1.*", "includedb2.includecol1"], [], {})
        self.opman.oplog = self.primary_conn["local"]["oplog.rs"]
        for i in range(1000):
            self.primary_conn["includedb1"]["includecol1"].insert_one(
                {"idb1col1": i + 500})
        oplog = self.primary_conn["local"]["oplog.rs"]
        oplog = oplog.find({
            'op': {
                '$ne': 'n'
            }
        }).sort("$natural", pymongo.DESCENDING).limit(-1)[0]
        self.assertEqual(self.opman.get_last_oplog_timestamp(), oplog["ts"])

    def test_dump_collection(self):
        """Test the dump_collection method

        Cases:

        1. no namespace set is set
        2. include namespace set is set
        3. exclude namespace set is set

        empty oplog case has been tested in test_oplog_manager.py,
        skip that here.
        """

        # Put something in the dbs
        self.init_dbs()

        # no namespace set is set
        self.reset_opman([], [], {})
        self.opman.oplog = self.primary_conn["local"]["oplog.rs"]
        last_ts = self.opman.get_last_oplog_timestamp()
        self.assertEqual(last_ts, self.opman.dump_collection())
        self.assertEqual(len(self.opman.doc_managers[0]._search()), 10)

        # include namespace set is set
        self.reset_opman(["includedb1.*", "includedb2.includecol1"], [], {})
        self.opman.oplog = self.primary_conn["local"]["oplog.rs"]
        last_ts = self.opman.get_last_oplog_timestamp()
        self.assertEqual(last_ts, self.opman.dump_collection())
        self.assertEqual(len(self.opman.doc_managers[0]._search()), 6)

        # exclude namespace set is set
        self.reset_opman([], ["includedb2.excludecol2", "excludedb3.*"], {})
        self.opman.oplog = self.primary_conn["local"]["oplog.rs"]
        last_ts = self.opman.get_last_oplog_timestamp()
        self.assertEqual(last_ts, self.opman.dump_collection())
        self.assertEqual(len(self.opman.doc_managers[0]._search()), 6)

    def test_dump_collection_with_error(self):
        """Test the dump_collection method with invalid documents.

        Cases:

        1. non-empty oplog, continue_on_error=True, invalid documents
        """

        self.reset_opman(["includedb1.*", "includedb2.includecol1"], [], {})
        # non-empty oplog, continue_on_error=True, invalid documents
        self.opman.continue_on_error = True
        self.opman.oplog = self.primary_conn["local"]["oplog.rs"]

        docs = [{'a': i} for i in range(100)]
        for i in range(50, 60):
            docs[i]['_upsert_exception'] = True
        self.primary_conn['includedb1']['includecol3'].insert_many(docs)

        last_ts = self.opman.get_last_oplog_timestamp()
        self.assertEqual(last_ts, self.opman.dump_collection())
        docs = self.opman.doc_managers[0]._search()
        docs = list(filter(lambda doc: 'a' in doc, docs))
        docs.sort(key=lambda doc: doc['a'])

        self.assertEqual(len(docs), 90)
        expected_a = itertools.chain(range(0, 50), range(60, 100))
        for doc, correct_a in zip(docs, expected_a):
            self.assertEqual(doc['a'], correct_a)

    def test_init_cursor(self):
        """Test the init_cursor method

        Cases:

        1. no last checkpoint, no collection dump
        2. no last checkpoint, collection dump ok and stuff to dump
        3. no last checkpoint, nothing to dump, stuff in oplog
        4. no last checkpoint, nothing to dump, nothing in oplog
        5. no last checkpoint, no collection dump, stuff in oplog
        6. last checkpoint exists
        7. last checkpoint is behind
        """

        # N.B. these sub-cases build off of each other and cannot be re-ordered
        # without side-effects

        self.reset_opman(["includedb1.*", "includedb2.includecol1"], [], {})

        # No last checkpoint, no collection dump, nothing in oplog
        # "change oplog collection" to put nothing in oplog
        self.opman.oplog = self.primary_conn["includedb1"]["emptycollection"]
        self.opman.collection_dump = False
        self.assertTrue(
            all(doc['op'] == 'n' for doc in self.opman.init_cursor()[0]))
        self.assertEqual(self.opman.checkpoint, None)

        # No last checkpoint, empty collections, nothing in oplog
        self.opman.collection_dump = True
        cursor, cursor_empty = self.opman.init_cursor()
        self.assertEqual(cursor, None)
        self.assertTrue(cursor_empty)
        self.assertEqual(self.opman.checkpoint, None)

        # No last checkpoint, empty collections, something in oplog
        self.opman.oplog = self.primary_conn['local']['oplog.rs']
        collection = self.primary_conn["includedb1"]["includecol1"]
        collection.insert_one({"idb1col1": 1})
        collection.delete_one({"idb1col1": 1})
        time.sleep(3)
        last_ts = self.opman.get_last_oplog_timestamp()
        cursor, cursor_empty = self.opman.init_cursor()
        self.assertFalse(cursor_empty)
        self.assertEqual(self.opman.checkpoint, last_ts)
        self.assertEqual(self.opman.read_last_checkpoint(), last_ts)

        # No last checkpoint, no collection dump, something in oplog
        # If collection dump is false the checkpoint should not be set
        self.opman.checkpoint = None
        self.opman.oplog_progress = LockingDict()
        self.opman.collection_dump = False
        collection.insert_one({"idb1col1": 2})
        cursor, cursor_empty = self.opman.init_cursor()
        for doc in cursor:
            last_doc = doc
        self.assertEqual(last_doc['o']['idb1col1'], 2)
        self.assertIsNone(self.opman.checkpoint)

        # Last checkpoint exists
        collection.insert_many([{"idb1col1": i + 500} for i in range(1000)])
        entry = list(self.primary_conn["local"]["oplog.rs"].find(skip=200,
                                                                 limit=-2))
        self.opman.update_checkpoint(entry[0]["ts"])
        cursor, cursor_empty = self.opman.init_cursor()
        self.assertEqual(next(cursor)["ts"], entry[1]["ts"])
        self.assertEqual(self.opman.checkpoint, entry[0]["ts"])
        self.assertEqual(self.opman.read_last_checkpoint(), entry[0]["ts"])

        # Last checkpoint is behind
        self.opman.update_checkpoint(bson.Timestamp(1, 0))
        cursor, cursor_empty = self.opman.init_cursor()
        self.assertTrue(cursor_empty)
        self.assertEqual(cursor, None)
        self.assertEqual(self.opman.checkpoint, bson.Timestamp(1, 0))

    def test_namespace_mapping(self):
        """Test mapping of namespaces
        Cases:

        upsert/delete/update of documents:
        1. in namespace set, mapping provided
        2. outside of namespace set, mapping provided
        """

        source_ns_wildcard = ["includedb1.*", "includedb2.includecol1"]
        source_ns = [
            "includedb1.includecol1", "includedb1.includecol2",
            "includedb2.includecol1"
        ]
        phony_ns = ["includedb2.excludecol2", "excludedb3.excludecol1"]
        dest_mapping = {
            "includedb1.*": "newdb1_*.bar",
            "includedb2.includecol1": "newdb2.newcol1"
        }
        self.reset_opman(source_ns_wildcard, [], dest_mapping)
        docman = self.opman.doc_managers[0]
        dest_mapping_stru = self.opman.dest_mapping_stru

        # start replicating
        self.opman.start()

        base_doc = {"_id": 1, "name": "superman"}

        # doc in namespace set
        for ns in source_ns:
            db, coll = ns.split(".", 1)

            # test insert
            self.primary_conn[db][coll].insert_one(base_doc)

            assert_soon(lambda: len(docman._search()) == 1)
            self.assertEqual(docman._search()[0]["ns"],
                             dest_mapping_stru.get(ns))
            bad = [d for d in docman._search() if d["ns"] == ns]
            self.assertEqual(len(bad), 0)

            # test update
            self.primary_conn[db][coll].update_one(
                {"_id": 1}, {"$set": {
                    "weakness": "kryptonite"
                }})

            def update_complete():
                docs = docman._search()
                for d in docs:
                    if d.get("weakness") == "kryptonite":
                        return True
                    return False

            assert_soon(update_complete)
            self.assertEqual(docman._search()[0]["ns"],
                             dest_mapping_stru.get(ns))
            bad = [d for d in docman._search() if d["ns"] == ns]
            self.assertEqual(len(bad), 0)

            # test delete
            self.primary_conn[db][coll].delete_one({"_id": 1})
            assert_soon(lambda: len(docman._search()) == 0)
            bad = [
                d for d in docman._search()
                if d["ns"] == dest_mapping_stru.get(ns)
            ]
            self.assertEqual(len(bad), 0)

            # cleanup
            self.primary_conn[db][coll].delete_many({})
            self.opman.doc_managers[0]._delete()

        # doc not in namespace set
        for ns in phony_ns:
            db, coll = ns.split(".", 1)

            # test insert
            self.primary_conn[db][coll].insert_one(base_doc)
            time.sleep(1)
            self.assertEqual(len(docman._search()), 0)
            # test update
            self.primary_conn[db][coll].update_one(
                {"_id": 1}, {"$set": {
                    "weakness": "kryptonite"
                }})
            time.sleep(1)
            self.assertEqual(len(docman._search()), 0)

    def test_many_targets(self):
        """Test that one OplogThread is capable of replicating to more than
        one target.
        """

        self.reset_opman(["includedb1.*"], [], {})
        doc_managers = [DocManager(), DocManager(), DocManager()]
        self.opman.doc_managers = doc_managers

        # start replicating
        self.opman.start()
        self.primary_conn["includedb1"]["includecol1"].insert_one({
            "name":
            "kermit",
            "color":
            "green"
        })
        self.primary_conn["includedb1"]["includecol2"].insert_one({
            "name":
            "elmo",
            "color":
            "firetruck red"
        })
        self.primary_conn["excludedb2"]["excludecol1"].insert_one({
            "name":
            "panda",
            "color":
            "white and black"
        })

        assert_soon(
            lambda: sum(len(d._search()) for d in doc_managers) == 6,
            "OplogThread should be able to replicate to multiple targets")

        self.primary_conn["includedb1"]["includecol2"].delete_one(
            {"name": "elmo"})

        assert_soon(
            lambda: sum(len(d._search()) for d in doc_managers) == 3,
            "OplogThread should be able to replicate to multiple targets")
        for d in doc_managers:
            self.assertEqual(d._search()[0]["name"], "kermit")
Esempio n. 16
0
class TestOplogManagerSharded(unittest.TestCase):
    """Defines all test cases for OplogThreads running on a sharded
    cluster
    """
    def setUp(self):
        """ Initialize the cluster:

        Clean out the databases used by the tests
        Make connections to mongos, mongods
        Create and shard test collections
        Create OplogThreads
        """
        self.cluster = ShardedCluster().start()

        # Connection to mongos
        self.mongos_conn = self.cluster.client()

        # Connections to the shards
        self.shard1_conn = self.cluster.shards[0].client()
        self.shard2_conn = self.cluster.shards[1].client()
        self.shard1_secondary_conn = self.cluster.shards[0].secondary.client(
            readPreference=ReadPreference.SECONDARY_PREFERRED)
        self.shard2_secondary_conn = self.cluster.shards[1].secondary.client(
            readPreference=ReadPreference.SECONDARY_PREFERRED)

        # Wipe any test data
        self.mongos_conn["test"]["mcsharded"].drop()

        # Create and shard the collection test.mcsharded on the "i" field
        self.mongos_conn["test"]["mcsharded"].ensure_index("i")
        self.mongos_conn.admin.command("enableSharding", "test")
        self.mongos_conn.admin.command("shardCollection",
                                       "test.mcsharded",
                                       key={"i": 1})

        # Pre-split the collection so that:
        # i < 1000            lives on shard1
        # i >= 1000           lives on shard2
        self.mongos_conn.admin.command(
            bson.SON([("split", "test.mcsharded"), ("middle", {
                "i": 1000
            })]))

        # disable the balancer
        self.mongos_conn.config.settings.update({"_id": "balancer"},
                                                {"$set": {
                                                    "stopped": True
                                                }},
                                                upsert=True)

        # Move chunks to their proper places
        try:
            self.mongos_conn["admin"].command("moveChunk",
                                              "test.mcsharded",
                                              find={"i": 1},
                                              to='demo-set-0')
        except pymongo.errors.OperationFailure:
            pass
        try:
            self.mongos_conn["admin"].command("moveChunk",
                                              "test.mcsharded",
                                              find={"i": 1000},
                                              to='demo-set-1')
        except pymongo.errors.OperationFailure:
            pass

        # Make sure chunks are distributed correctly
        self.mongos_conn["test"]["mcsharded"].insert({"i": 1})
        self.mongos_conn["test"]["mcsharded"].insert({"i": 1000})

        def chunks_moved():
            doc1 = self.shard1_conn.test.mcsharded.find_one()
            doc2 = self.shard2_conn.test.mcsharded.find_one()
            if None in (doc1, doc2):
                return False
            return doc1['i'] == 1 and doc2['i'] == 1000

        assert_soon(chunks_moved,
                    max_tries=120,
                    message='chunks not moved? doc1=%r, doc2=%r' %
                    (self.shard1_conn.test.mcsharded.find_one(),
                     self.shard2_conn.test.mcsharded.find_one()))
        self.mongos_conn.test.mcsharded.remove()

        # create a new oplog progress file
        try:
            os.unlink("oplog.timestamp")
        except OSError:
            pass
        open("oplog.timestamp", "w").close()

        # Oplog threads (oplog manager) for each shard
        doc_manager = DocManager()
        oplog_progress = LockingDict()
        self.opman1 = OplogThread(
            primary_client=self.shard1_conn,
            doc_managers=(doc_manager, ),
            oplog_progress_dict=oplog_progress,
            ns_set=["test.mcsharded", "test.mcunsharded"],
            mongos_client=self.mongos_conn)
        self.opman2 = OplogThread(
            primary_client=self.shard2_conn,
            doc_managers=(doc_manager, ),
            oplog_progress_dict=oplog_progress,
            ns_set=["test.mcsharded", "test.mcunsharded"],
            mongos_client=self.mongos_conn)

    def tearDown(self):
        try:
            self.opman1.join()
        except RuntimeError:
            pass  # thread may not have been started
        try:
            self.opman2.join()
        except RuntimeError:
            pass  # thread may not have been started
        self.mongos_conn.close()
        self.shard1_conn.close()
        self.shard2_conn.close()
        self.shard1_secondary_conn.close()
        self.shard2_secondary_conn.close()
        self.cluster.stop()

    def test_get_oplog_cursor(self):
        """Test the get_oplog_cursor method"""

        # timestamp = None
        cursor1 = self.opman1.get_oplog_cursor(None)
        oplog1 = self.shard1_conn["local"]["oplog.rs"].find({
            'op': {
                '$ne': 'n'
            },
            'ns': {
                '$not': re.compile(r'\.system')
            }
        })
        self.assertEqual(list(cursor1), list(oplog1))

        cursor2 = self.opman2.get_oplog_cursor(None)
        oplog2 = self.shard2_conn["local"]["oplog.rs"].find({
            'op': {
                '$ne': 'n'
            },
            'ns': {
                '$not': re.compile(r'\.system')
            }
        })
        self.assertEqual(list(cursor2), list(oplog2))

        # earliest entry is the only one at/after timestamp
        doc = {"ts": bson.Timestamp(1000, 0), "i": 1}
        self.mongos_conn["test"]["mcsharded"].insert(doc)
        latest_timestamp = self.opman1.get_last_oplog_timestamp()
        cursor = self.opman1.get_oplog_cursor(latest_timestamp)
        self.assertNotEqual(cursor, None)
        self.assertEqual(cursor.count(), 1)
        next_entry_id = cursor[0]['o']['_id']
        retrieved = self.mongos_conn.test.mcsharded.find_one(next_entry_id)
        self.assertEqual(retrieved, doc)

        # many entries before and after timestamp
        for i in range(2, 2002):
            self.mongos_conn["test"]["mcsharded"].insert({"i": i})
        oplog1 = self.shard1_conn["local"]["oplog.rs"].find(
            sort=[("ts", pymongo.ASCENDING)])
        oplog2 = self.shard2_conn["local"]["oplog.rs"].find(
            sort=[("ts", pymongo.ASCENDING)])

        # oplogs should have records for inserts performed, plus
        # various other messages
        oplog1_count = oplog1.count()
        oplog2_count = oplog2.count()
        self.assertGreaterEqual(oplog1_count, 998)
        self.assertGreaterEqual(oplog2_count, 1002)
        pivot1 = oplog1.skip(400).limit(1)[0]
        pivot2 = oplog2.skip(400).limit(1)[0]

        cursor1 = self.opman1.get_oplog_cursor(pivot1["ts"])
        cursor2 = self.opman2.get_oplog_cursor(pivot2["ts"])
        self.assertEqual(cursor1.count(), oplog1_count - 400)
        self.assertEqual(cursor2.count(), oplog2_count - 400)

    def test_get_last_oplog_timestamp(self):
        """Test the get_last_oplog_timestamp method"""

        # "empty" the oplog
        self.opman1.oplog = self.shard1_conn["test"]["emptycollection"]
        self.opman2.oplog = self.shard2_conn["test"]["emptycollection"]
        self.assertEqual(self.opman1.get_last_oplog_timestamp(), None)
        self.assertEqual(self.opman2.get_last_oplog_timestamp(), None)

        # Test non-empty oplog
        self.opman1.oplog = self.shard1_conn["local"]["oplog.rs"]
        self.opman2.oplog = self.shard2_conn["local"]["oplog.rs"]
        for i in range(1000):
            self.mongos_conn["test"]["mcsharded"].insert({"i": i + 500})
        oplog1 = self.shard1_conn["local"]["oplog.rs"]
        oplog1 = oplog1.find().sort("$natural", pymongo.DESCENDING).limit(1)[0]
        oplog2 = self.shard2_conn["local"]["oplog.rs"]
        oplog2 = oplog2.find().sort("$natural", pymongo.DESCENDING).limit(1)[0]
        self.assertEqual(self.opman1.get_last_oplog_timestamp(), oplog1["ts"])
        self.assertEqual(self.opman2.get_last_oplog_timestamp(), oplog2["ts"])

    def test_dump_collection(self):
        """Test the dump_collection method

        Cases:

        1. empty oplog
        2. non-empty oplog
        """

        # Test with empty oplog
        self.opman1.oplog = self.shard1_conn["test"]["emptycollection"]
        self.opman2.oplog = self.shard2_conn["test"]["emptycollection"]
        last_ts1 = self.opman1.dump_collection()
        last_ts2 = self.opman2.dump_collection()
        self.assertEqual(last_ts1, None)
        self.assertEqual(last_ts2, None)

        # Test with non-empty oplog
        self.opman1.oplog = self.shard1_conn["local"]["oplog.rs"]
        self.opman2.oplog = self.shard2_conn["local"]["oplog.rs"]
        for i in range(1000):
            self.mongos_conn["test"]["mcsharded"].insert({"i": i + 500})
        last_ts1 = self.opman1.get_last_oplog_timestamp()
        last_ts2 = self.opman2.get_last_oplog_timestamp()
        self.assertEqual(last_ts1, self.opman1.dump_collection())
        self.assertEqual(last_ts2, self.opman2.dump_collection())
        self.assertEqual(len(self.opman1.doc_managers[0]._search()), 1000)

    def test_init_cursor(self):
        """Test the init_cursor method

        Cases:

        1. no last checkpoint, no collection dump
        2. no last checkpoint, collection dump ok and stuff to dump
        3. no last checkpoint, nothing to dump, stuff in oplog
        4. no last checkpoint, nothing to dump, nothing in oplog
        5. no last checkpoint, no collection dump, stuff in oplog
        6. last checkpoint exists
        7. last checkpoint is behind
        """

        # N.B. these sub-cases build off of each other and cannot be re-ordered
        # without side-effects

        # No last checkpoint, no collection dump, nothing in oplog
        # "change oplog collection" to put nothing in oplog
        self.opman1.oplog = self.shard1_conn["test"]["emptycollection"]
        self.opman2.oplog = self.shard2_conn["test"]["emptycollection"]
        self.opman1.collection_dump = False
        self.opman2.collection_dump = False
        self.assertTrue(
            all(doc['op'] == 'n' for doc in self.opman1.init_cursor()[0]))
        self.assertEqual(self.opman1.checkpoint, None)
        self.assertTrue(
            all(doc['op'] == 'n' for doc in self.opman2.init_cursor()[0]))
        self.assertEqual(self.opman2.checkpoint, None)

        # No last checkpoint, empty collections, nothing in oplog
        self.opman1.collection_dump = self.opman2.collection_dump = True

        cursor, cursor_len = self.opman1.init_cursor()
        self.assertEqual(cursor, None)
        self.assertEqual(cursor_len, 0)
        self.assertEqual(self.opman1.checkpoint, None)
        cursor, cursor_len = self.opman2.init_cursor()
        self.assertEqual(cursor, None)
        self.assertEqual(cursor_len, 0)
        self.assertEqual(self.opman2.checkpoint, None)

        # No last checkpoint, empty collections, something in oplog
        self.opman1.oplog = self.shard1_conn["local"]["oplog.rs"]
        self.opman2.oplog = self.shard2_conn["local"]["oplog.rs"]
        oplog_startup_ts = self.opman2.get_last_oplog_timestamp()
        collection = self.mongos_conn["test"]["mcsharded"]
        collection.insert({"i": 1})
        collection.remove({"i": 1})
        time.sleep(3)
        last_ts1 = self.opman1.get_last_oplog_timestamp()
        cursor, cursor_len = self.opman1.init_cursor()
        self.assertEqual(cursor_len, 0)
        self.assertEqual(self.opman1.checkpoint, last_ts1)
        with self.opman1.oplog_progress as prog:
            self.assertEqual(prog.get_dict()[str(self.opman1.oplog)], last_ts1)
        # init_cursor should point to startup message in shard2 oplog
        cursor, cursor_len = self.opman2.init_cursor()
        self.assertEqual(cursor_len, 0)
        self.assertEqual(self.opman2.checkpoint, oplog_startup_ts)

        # No last checkpoint, no collection dump, stuff in oplog
        progress = LockingDict()
        self.opman1.oplog_progress = self.opman2.oplog_progress = progress
        self.opman1.collection_dump = self.opman2.collection_dump = False
        collection.insert({"i": 1200})
        last_ts2 = self.opman2.get_last_oplog_timestamp()
        self.opman1.init_cursor()
        self.assertEqual(self.opman1.checkpoint, last_ts1)
        with self.opman1.oplog_progress as prog:
            self.assertEqual(prog.get_dict()[str(self.opman1.oplog)], last_ts1)
        cursor, cursor_len = self.opman2.init_cursor()
        for i in range(cursor_len - 1):
            next(cursor)
        self.assertEqual(next(cursor)["o"]["i"], 1200)
        self.assertEqual(self.opman2.checkpoint, last_ts2)
        with self.opman2.oplog_progress as prog:
            self.assertEqual(prog.get_dict()[str(self.opman2.oplog)], last_ts2)

        # Last checkpoint exists
        progress = LockingDict()
        self.opman1.oplog_progress = self.opman2.oplog_progress = progress
        for i in range(1000):
            collection.insert({"i": i + 500})
        entry1 = list(self.shard1_conn["local"]["oplog.rs"].find(skip=200,
                                                                 limit=2))
        entry2 = list(self.shard2_conn["local"]["oplog.rs"].find(skip=200,
                                                                 limit=2))
        progress.get_dict()[str(self.opman1.oplog)] = entry1[0]["ts"]
        progress.get_dict()[str(self.opman2.oplog)] = entry2[0]["ts"]
        self.opman1.oplog_progress = self.opman2.oplog_progress = progress
        self.opman1.checkpoint = self.opman2.checkpoint = None
        cursor1, cursor_len1 = self.opman1.init_cursor()
        cursor2, cursor_len2 = self.opman2.init_cursor()
        self.assertEqual(entry1[1]["ts"], next(cursor1)["ts"])
        self.assertEqual(entry2[1]["ts"], next(cursor2)["ts"])
        self.assertEqual(self.opman1.checkpoint, entry1[0]["ts"])
        self.assertEqual(self.opman2.checkpoint, entry2[0]["ts"])
        with self.opman1.oplog_progress as prog:
            self.assertEqual(prog.get_dict()[str(self.opman1.oplog)],
                             entry1[0]["ts"])
        with self.opman2.oplog_progress as prog:
            self.assertEqual(prog.get_dict()[str(self.opman2.oplog)],
                             entry2[0]["ts"])

        # Last checkpoint is behind
        progress = LockingDict()
        progress.get_dict()[str(self.opman1.oplog)] = bson.Timestamp(1, 0)
        progress.get_dict()[str(self.opman2.oplog)] = bson.Timestamp(1, 0)
        self.opman1.oplog_progress = self.opman2.oplog_progress = progress
        self.opman1.checkpoint = self.opman2.checkpoint = None
        cursor, cursor_len = self.opman1.init_cursor()
        self.assertEqual(cursor_len, 0)
        self.assertEqual(cursor, None)
        self.assertIsNotNone(self.opman1.checkpoint)
        cursor, cursor_len = self.opman2.init_cursor()
        self.assertEqual(cursor_len, 0)
        self.assertEqual(cursor, None)
        self.assertIsNotNone(self.opman2.checkpoint)

    def test_rollback(self):
        """Test the rollback method in a sharded environment

        Cases:
        1. Documents on both shards, rollback on one shard
        2. Documents on both shards, rollback on both shards

        """

        self.opman1.start()
        self.opman2.start()

        # Insert first documents while primaries are up
        db_main = self.mongos_conn["test"]["mcsharded"]
        db_main.insert({"i": 0}, w=2)
        db_main.insert({"i": 1000}, w=2)
        self.assertEqual(self.shard1_conn["test"]["mcsharded"].count(), 1)
        self.assertEqual(self.shard2_conn["test"]["mcsharded"].count(), 1)

        # Case 1: only one primary goes down, shard1 in this case
        self.cluster.shards[0].primary.stop(destroy=False)

        # Wait for the secondary to be promoted
        shard1_secondary_admin = self.shard1_secondary_conn["admin"]
        assert_soon(
            lambda: shard1_secondary_admin.command("isMaster")["ismaster"])

        # Insert another document. This will be rolled back later
        retry_until_ok(db_main.insert, {"i": 1})
        db_secondary1 = self.shard1_secondary_conn["test"]["mcsharded"]
        db_secondary2 = self.shard2_secondary_conn["test"]["mcsharded"]
        self.assertEqual(db_secondary1.count(), 2)

        # Wait for replication on the doc manager
        # Note that both OplogThreads share the same doc manager
        c = lambda: len(self.opman1.doc_managers[0]._search()) == 3
        assert_soon(c,
                    "not all writes were replicated to doc manager",
                    max_tries=120)

        # Kill the new primary
        self.cluster.shards[0].secondary.stop(destroy=False)

        # Start both servers back up
        self.cluster.shards[0].primary.start()
        primary_admin = self.shard1_conn["admin"]
        c = lambda: primary_admin.command("isMaster")["ismaster"]
        assert_soon(lambda: retry_until_ok(c))
        self.cluster.shards[0].secondary.start()
        secondary_admin = self.shard1_secondary_conn["admin"]
        c = lambda: secondary_admin.command("replSetGetStatus")["myState"] == 2
        assert_soon(c)
        query = {"i": {"$lt": 1000}}
        assert_soon(lambda: retry_until_ok(db_main.find(query).count) > 0)

        # Only first document should exist in MongoDB
        self.assertEqual(db_main.find(query).count(), 1)
        self.assertEqual(db_main.find_one(query)["i"], 0)

        def check_docman_rollback():
            docman_docs = [
                d for d in self.opman1.doc_managers[0]._search()
                if d["i"] < 1000
            ]
            return len(docman_docs) == 1 and docman_docs[0]["i"] == 0

        assert_soon(check_docman_rollback, "doc manager did not roll back")

        # Wait for previous rollback to complete.
        # Insert/delete one document to jump-start replication to secondaries
        # in MongoDB 3.x.
        db_main.insert({'i': -1})
        db_main.remove({'i': -1})

        def rollback_done():
            secondary1_count = retry_until_ok(db_secondary1.count)
            secondary2_count = retry_until_ok(db_secondary2.count)
            return (1, 1) == (secondary1_count, secondary2_count)

        assert_soon(rollback_done,
                    "rollback never replicated to one or more secondaries")

        ##############################

        # Case 2: Primaries on both shards go down
        self.cluster.shards[0].primary.stop(destroy=False)
        self.cluster.shards[1].primary.stop(destroy=False)

        # Wait for the secondaries to be promoted
        shard1_secondary_admin = self.shard1_secondary_conn["admin"]
        shard2_secondary_admin = self.shard2_secondary_conn["admin"]
        assert_soon(
            lambda: shard1_secondary_admin.command("isMaster")["ismaster"])
        assert_soon(
            lambda: shard2_secondary_admin.command("isMaster")["ismaster"])

        # Insert another document on each shard. These will be rolled back later
        retry_until_ok(db_main.insert, {"i": 1})
        self.assertEqual(db_secondary1.count(), 2)
        retry_until_ok(db_main.insert, {"i": 1001})
        self.assertEqual(db_secondary2.count(), 2)

        # Wait for replication on the doc manager
        c = lambda: len(self.opman1.doc_managers[0]._search()) == 4
        assert_soon(c, "not all writes were replicated to doc manager")

        # Kill the new primaries
        self.cluster.shards[0].secondary.stop(destroy=False)
        self.cluster.shards[1].secondary.stop(destroy=False)

        # Start the servers back up...
        # Shard 1
        self.cluster.shards[0].primary.start()
        c = lambda: self.shard1_conn['admin'].command("isMaster")["ismaster"]
        assert_soon(lambda: retry_until_ok(c))
        self.cluster.shards[0].secondary.start()
        secondary_admin = self.shard1_secondary_conn["admin"]
        c = lambda: secondary_admin.command("replSetGetStatus")["myState"] == 2
        assert_soon(c)
        # Shard 2
        self.cluster.shards[1].primary.start()
        c = lambda: self.shard2_conn['admin'].command("isMaster")["ismaster"]
        assert_soon(lambda: retry_until_ok(c))
        self.cluster.shards[1].secondary.start()
        secondary_admin = self.shard2_secondary_conn["admin"]
        c = lambda: secondary_admin.command("replSetGetStatus")["myState"] == 2
        assert_soon(c)

        # Wait for the shards to come online
        assert_soon(lambda: retry_until_ok(db_main.find(query).count) > 0)
        query2 = {"i": {"$gte": 1000}}
        assert_soon(lambda: retry_until_ok(db_main.find(query2).count) > 0)

        # Only first documents should exist in MongoDB
        self.assertEqual(db_main.find(query).count(), 1)
        self.assertEqual(db_main.find_one(query)["i"], 0)
        self.assertEqual(db_main.find(query2).count(), 1)
        self.assertEqual(db_main.find_one(query2)["i"], 1000)

        # Same should hold for the doc manager
        assert_soon(lambda: len(self.opman1.doc_managers[0]._search()) == 2)
        i_values = [d["i"] for d in self.opman1.doc_managers[0]._search()]
        self.assertIn(0, i_values)
        self.assertIn(1000, i_values)

    def test_with_chunk_migration(self):
        """Test that DocManagers have proper state after both a successful
        and an unsuccessful chunk migration
        """

        # Start replicating to dummy doc managers
        self.opman1.start()
        self.opman2.start()

        collection = self.mongos_conn["test"]["mcsharded"]
        for i in range(1000):
            collection.insert({"i": i + 500})
        # Assert current state of the mongoverse
        self.assertEqual(self.shard1_conn["test"]["mcsharded"].find().count(),
                         500)
        self.assertEqual(self.shard2_conn["test"]["mcsharded"].find().count(),
                         500)
        assert_soon(lambda: len(self.opman1.doc_managers[0]._search()) == 1000)

        # Test successful chunk move from shard 1 to shard 2
        self.mongos_conn["admin"].command("moveChunk",
                                          "test.mcsharded",
                                          find={"i": 1},
                                          to="demo-set-1")

        # doc manager should still have all docs
        all_docs = self.opman1.doc_managers[0]._search()
        self.assertEqual(len(all_docs), 1000)
        for i, doc in enumerate(sorted(all_docs, key=lambda x: x["i"])):
            self.assertEqual(doc["i"], i + 500)

        # Mark the collection as "dropped". This will cause migration to fail.
        self.mongos_conn["config"]["collections"].update(
            {"_id": "test.mcsharded"}, {"$set": {
                "dropped": True
            }})

        # Test unsuccessful chunk move from shard 2 to shard 1
        def fail_to_move_chunk():
            self.mongos_conn["admin"].command("moveChunk",
                                              "test.mcsharded",
                                              find={"i": 1},
                                              to="demo-set-0")

        self.assertRaises(pymongo.errors.OperationFailure, fail_to_move_chunk)
        # doc manager should still have all docs
        all_docs = self.opman1.doc_managers[0]._search()
        self.assertEqual(len(all_docs), 1000)
        for i, doc in enumerate(sorted(all_docs, key=lambda x: x["i"])):
            self.assertEqual(doc["i"], i + 500)

    def test_with_orphan_documents(self):
        """Test that DocManagers have proper state after a chunk migration
        that resuts in orphaned documents.
        """
        # Start replicating to dummy doc managers
        self.opman1.start()
        self.opman2.start()

        collection = self.mongos_conn["test"]["mcsharded"]
        collection.insert({"i": i + 500} for i in range(1000))
        # Assert current state of the mongoverse
        self.assertEqual(self.shard1_conn["test"]["mcsharded"].find().count(),
                         500)
        self.assertEqual(self.shard2_conn["test"]["mcsharded"].find().count(),
                         500)
        assert_soon(lambda: len(self.opman1.doc_managers[0]._search()) == 1000)

        # Stop replication using the 'rsSyncApplyStop' failpoint
        self.shard1_conn.admin.command("configureFailPoint",
                                       "rsSyncApplyStop",
                                       mode="alwaysOn")

        # Move a chunk from shard2 to shard1
        def move_chunk():
            try:
                self.mongos_conn["admin"].command("moveChunk",
                                                  "test.mcsharded",
                                                  find={"i": 1000},
                                                  to="demo-set-0")
            except pymongo.errors.OperationFailure:
                pass

        # moveChunk will never complete, so use another thread to continue
        mover = threading.Thread(target=move_chunk)
        mover.start()

        # wait for documents to start moving to shard 1
        assert_soon(lambda: self.shard1_conn.test.mcsharded.count() > 500)

        # Get opid for moveChunk command
        operations = self.mongos_conn.test.current_op()
        opid = None
        for op in operations["inprog"]:
            if op.get("query", {}).get("moveChunk"):
                opid = op["opid"]
        self.assertNotEqual(opid, None, "could not find moveChunk operation")
        # Kill moveChunk with the opid
        self.mongos_conn["test"]["$cmd.sys.killop"].find_one({"op": opid})

        # Mongo Connector should not become confused by unsuccessful chunk move
        docs = self.opman1.doc_managers[0]._search()
        self.assertEqual(len(docs), 1000)
        self.assertEqual(sorted(d["i"] for d in docs), list(range(500, 1500)))

        # cleanup
        mover.join()