Esempio n. 1
0
    def test_exclude_fields_constructor(self):
        # Test with the "_id" field in exclude_fields
        exclude_fields = ["_id", "title", "content", "author"]
        opman = OplogThread(primary_client=self.primary_conn,
                            doc_managers=(DocManager(), ),
                            oplog_progress_dict=LockingDict(),
                            dest_mapping_stru=self.dest_mapping_stru,
                            exclude_fields=exclude_fields)
        exclude_fields.remove('_id')
        self._check_fields(opman, [], exclude_fields,
                           dict((f, 0) for f in exclude_fields))
        extra_fields = exclude_fields + ['extra1', 'extra2']
        filtered = opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual(dict((f, 1) for f in ['extra1', 'extra2']), filtered)

        # Test without "_id" field included in exclude_fields
        exclude_fields = ["title", "content", "author"]
        opman = OplogThread(primary_client=self.primary_conn,
                            doc_managers=(DocManager(), ),
                            oplog_progress_dict=LockingDict(),
                            dest_mapping_stru=self.dest_mapping_stru,
                            exclude_fields=exclude_fields)
        self._check_fields(opman, [], exclude_fields,
                           dict((f, 0) for f in exclude_fields))
        extra_fields = extra_fields + ['extra1', 'extra2']
        filtered = opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual({'extra1': 1, 'extra2': 1}, filtered)

        # Test with only "_id" field in exclude_fields
        exclude_fields = ["_id"]
        opman = OplogThread(primary_client=self.primary_conn,
                            doc_managers=(DocManager(), ),
                            oplog_progress_dict=LockingDict(),
                            dest_mapping_stru=self.dest_mapping_stru,
                            exclude_fields=exclude_fields)
        self._check_fields(opman, [], [], None)
        extra_fields = exclude_fields + ['extra1', 'extra2']
        filtered = opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual(dict((f, 1) for f in extra_fields), filtered)

        # Test with nothing set for exclude_fields
        opman = OplogThread(primary_client=self.primary_conn,
                            doc_managers=(DocManager(), ),
                            oplog_progress_dict=LockingDict(),
                            dest_mapping_stru=self.dest_mapping_stru,
                            exclude_fields=None)
        self._check_fields(opman, [], [], None)
        extra_fields = ['_id', 'extra1', 'extra2']
        filtered = opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual(dict((f, 1) for f in extra_fields), filtered)
Esempio n. 2
0
    def test_fields_constructor(self):
        # Test with "_id" field in constructor
        fields = ["_id", "title", "content", "author"]
        opman = OplogThread(primary_client=self.primary_conn,
                            doc_managers=(DocManager(), ),
                            oplog_progress_dict=LockingDict(),
                            dest_mapping_stru=self.dest_mapping_stru,
                            fields=fields)
        self._check_fields(opman, fields, [], dict((f, 1) for f in fields))
        extra_fields = fields + ['extra1', 'extra2']
        filtered = opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual(dict((f, 1) for f in fields), filtered)

        # Test without "_id" field in constructor
        fields = ["title", "content", "author"]
        opman = OplogThread(primary_client=self.primary_conn,
                            doc_managers=(DocManager(), ),
                            oplog_progress_dict=LockingDict(),
                            dest_mapping_stru=self.dest_mapping_stru,
                            fields=fields)
        fields.append('_id')
        self._check_fields(opman, fields, [], dict((f, 1) for f in fields))
        extra_fields = fields + ['extra1', 'extra2']
        filtered = opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual(dict((f, 1) for f in fields), filtered)

        # Test with only "_id" field
        fields = ["_id"]
        opman = OplogThread(primary_client=self.primary_conn,
                            doc_managers=(DocManager(), ),
                            oplog_progress_dict=LockingDict(),
                            dest_mapping_stru=self.dest_mapping_stru,
                            fields=fields)
        self._check_fields(opman, fields, [], dict((f, 1) for f in fields))
        extra_fields = fields + ['extra1', 'extra2']
        filtered = opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual({'_id': 1}, filtered)

        # Test with no fields set
        opman = OplogThread(primary_client=self.primary_conn,
                            doc_managers=(DocManager(), ),
                            oplog_progress_dict=LockingDict(),
                            dest_mapping_stru=self.dest_mapping_stru)
        self._check_fields(opman, [], [], None)
        extra_fields = ['_id', 'extra1', 'extra2']
        filtered = opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual(dict((f, 1) for f in extra_fields), filtered)
class TestOplogManager(unittest.TestCase):
    """Defines all the testing methods, as well as a method that sets up the
        cluster
    """

    def setUp(self):
        _, _, self.primary_p = start_replica_set("test-oplog-manager")
        self.primary_conn = pymongo.MongoClient(mongo_host, self.primary_p)
        self.oplog_coll = self.primary_conn.local["oplog.rs"]
        self.opman = OplogThread(
            primary_conn=self.primary_conn,
            main_address="%s:%d" % (mongo_host, self.primary_p),
            oplog_coll=self.oplog_coll,
            is_sharded=False,
            doc_manager=DocManager(),
            oplog_progress_dict=LockingDict(),
            namespace_set=None,
            auth_key=None,
            auth_username=None,
            repl_set="test-oplog-manager",
        )

    def tearDown(self):
        try:
            self.opman.join()
        except RuntimeError:
            pass  # OplogThread may not have been started
        self.primary_conn.close()
        kill_replica_set("test-oplog-manager")

    def test_get_oplog_cursor(self):
        """Test the get_oplog_cursor method"""

        # timestamp is None - all oplog entries are returned.
        cursor = self.opman.get_oplog_cursor(None)
        self.assertEqual(cursor.count(), self.primary_conn["local"]["oplog.rs"].count())

        # earliest entry is the only one at/after timestamp
        doc = {"ts": bson.Timestamp(1000, 0), "i": 1}
        self.primary_conn["test"]["test"].insert(doc)
        latest_timestamp = self.opman.get_last_oplog_timestamp()
        cursor = self.opman.get_oplog_cursor(latest_timestamp)
        self.assertNotEqual(cursor, None)
        self.assertEqual(cursor.count(), 1)
        next_entry_id = next(cursor)["o"]["_id"]
        retrieved = self.primary_conn.test.test.find_one(next_entry_id)
        self.assertEqual(retrieved, doc)

        # many entries before and after timestamp
        self.primary_conn["test"]["test"].insert({"i": i} for i in range(2, 1002))
        oplog_cursor = self.oplog_coll.find(sort=[("ts", pymongo.ASCENDING)])

        # startup + insert + 1000 inserts
        self.assertEqual(oplog_cursor.count(), 2 + 1000)
        pivot = oplog_cursor.skip(400).limit(1)[0]

        goc_cursor = self.opman.get_oplog_cursor(pivot["ts"])
        self.assertEqual(goc_cursor.count(), 2 + 1000 - 400)

    def test_get_last_oplog_timestamp(self):
        """Test the get_last_oplog_timestamp method"""

        # "empty" the oplog
        self.opman.oplog = self.primary_conn["test"]["emptycollection"]
        self.assertEqual(self.opman.get_last_oplog_timestamp(), None)

        # Test non-empty oplog
        self.opman.oplog = self.primary_conn["local"]["oplog.rs"]
        for i in range(1000):
            self.primary_conn["test"]["test"].insert({"i": i + 500})
        oplog = self.primary_conn["local"]["oplog.rs"]
        oplog = oplog.find().sort("$natural", pymongo.DESCENDING).limit(1)[0]
        self.assertEqual(self.opman.get_last_oplog_timestamp(), oplog["ts"])

    def test_dump_collection(self):
        """Test the dump_collection method

        Cases:

        1. empty oplog
        2. non-empty oplog
        """

        # Test with empty oplog
        self.opman.oplog = self.primary_conn["test"]["emptycollection"]
        last_ts = self.opman.dump_collection()
        self.assertEqual(last_ts, None)

        # Test with non-empty oplog
        self.opman.oplog = self.primary_conn["local"]["oplog.rs"]
        for i in range(1000):
            self.primary_conn["test"]["test"].insert({"i": i + 500})
        last_ts = self.opman.get_last_oplog_timestamp()
        self.assertEqual(last_ts, self.opman.dump_collection())
        self.assertEqual(len(self.opman.doc_managers[0]._search()), 1000)

    def test_dump_collection_with_error(self):
        """Test the dump_collection method with invalid documents.

        Cases:

        1. non-empty oplog, continue_on_error=True, invalid documents
        """

        # non-empty oplog, continue_on_error=True, invalid documents
        self.opman.continue_on_error = True
        self.opman.oplog = self.primary_conn["local"]["oplog.rs"]

        docs = [{"a": i} for i in range(100)]
        for i in range(50, 60):
            docs[i]["_upsert_exception"] = True
        self.primary_conn["test"]["test"].insert(docs)

        last_ts = self.opman.get_last_oplog_timestamp()
        self.assertEqual(last_ts, self.opman.dump_collection())
        docs = self.opman.doc_managers[0]._search()
        docs.sort()

        self.assertEqual(len(docs), 90)
        for doc, correct_a in zip(docs, range(0, 50) + range(60, 100)):
            self.assertEquals(doc["a"], correct_a)

    def test_init_cursor(self):
        """Test the init_cursor method

        Cases:

        1. no last checkpoint, no collection dump
        2. no last checkpoint, collection dump ok and stuff to dump
        3. no last checkpoint, nothing to dump, stuff in oplog
        4. no last checkpoint, nothing to dump, nothing in oplog
        5. no last checkpoint, no collection dump, stuff in oplog
        6. last checkpoint exists
        7. last checkpoint is behind
        """

        # N.B. these sub-cases build off of each other and cannot be re-ordered
        # without side-effects

        # No last checkpoint, no collection dump, nothing in oplog
        # "change oplog collection" to put nothing in oplog
        self.opman.oplog = self.primary_conn["test"]["emptycollection"]
        self.opman.collection_dump = False
        self.assertTrue(all(doc["op"] == "n" for doc in self.opman.init_cursor()[0]))
        self.assertEqual(self.opman.checkpoint, None)

        # No last checkpoint, empty collections, nothing in oplog
        self.opman.collection_dump = True
        cursor, cursor_len = self.opman.init_cursor()
        self.assertEqual(cursor, None)
        self.assertEqual(cursor_len, 0)
        self.assertEqual(self.opman.checkpoint, None)

        # No last checkpoint, empty collections, something in oplog
        self.opman.oplog = self.primary_conn["local"]["oplog.rs"]
        collection = self.primary_conn["test"]["test"]
        collection.insert({"i": 1})
        collection.remove({"i": 1})
        time.sleep(3)
        last_ts = self.opman.get_last_oplog_timestamp()
        cursor, cursor_len = self.opman.init_cursor()
        self.assertEqual(cursor_len, 0)
        self.assertEqual(self.opman.checkpoint, last_ts)
        with self.opman.oplog_progress as prog:
            self.assertEqual(prog.get_dict()[str(self.opman.oplog)], last_ts)

        # No last checkpoint, no collection dump, something in oplog
        self.opman.oplog_progress = LockingDict()
        self.opman.collection_dump = False
        collection.insert({"i": 2})
        last_ts = self.opman.get_last_oplog_timestamp()
        cursor, cursor_len = self.opman.init_cursor()
        for i in range(cursor_len - 1):
            next(cursor)
        self.assertEqual(next(cursor)["o"]["i"], 2)
        self.assertEqual(self.opman.checkpoint, last_ts)

        # Last checkpoint exists
        progress = LockingDict()
        self.opman.oplog_progress = progress
        for i in range(1000):
            collection.insert({"i": i + 500})
        entry = list(self.primary_conn["local"]["oplog.rs"].find(skip=200, limit=2))
        progress.get_dict()[str(self.opman.oplog)] = entry[0]["ts"]
        self.opman.oplog_progress = progress
        self.opman.checkpoint = None
        cursor, cursor_len = self.opman.init_cursor()
        self.assertEqual(next(cursor)["ts"], entry[1]["ts"])
        self.assertEqual(self.opman.checkpoint, entry[0]["ts"])
        with self.opman.oplog_progress as prog:
            self.assertEqual(prog.get_dict()[str(self.opman.oplog)], entry[0]["ts"])

        # Last checkpoint is behind
        progress = LockingDict()
        progress.get_dict()[str(self.opman.oplog)] = bson.Timestamp(1, 0)
        self.opman.oplog_progress = progress
        self.opman.checkpoint = None
        cursor, cursor_len = self.opman.init_cursor()
        self.assertEqual(cursor_len, 0)
        self.assertEqual(cursor, None)
        self.assertIsNotNone(self.opman.checkpoint)

    def test_filter_fields(self):
        docman = self.opman.doc_managers[0]
        conn = self.opman.main_connection

        include_fields = ["a", "b", "c"]
        exclude_fields = ["d", "e", "f"]

        # Set fields to care about
        self.opman.fields = include_fields
        # Documents have more than just these fields
        doc = {"a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6, "_id": 1}
        db = conn["test"]["test"]
        db.insert(doc)
        assert_soon(lambda: db.count() == 1)
        self.opman.dump_collection()

        result = docman._search()[0]
        keys = result.keys()
        for inc, exc in zip(include_fields, exclude_fields):
            self.assertIn(inc, keys)
            self.assertNotIn(exc, keys)

    def test_namespace_mapping(self):
        """Test mapping of namespaces
        Cases:

        upsert/delete/update of documents:
        1. in namespace set, mapping provided
        2. outside of namespace set, mapping provided
        """

        source_ns = ["test.test1", "test.test2"]
        phony_ns = ["test.phony1", "test.phony2"]
        dest_mapping = {"test.test1": "test.test1_dest", "test.test2": "test.test2_dest"}
        self.opman.dest_mapping = dest_mapping
        self.opman.namespace_set = source_ns
        docman = self.opman.doc_managers[0]
        # start replicating
        self.opman.start()

        base_doc = {"_id": 1, "name": "superman"}

        # doc in namespace set
        for ns in source_ns:
            db, coll = ns.split(".", 1)

            # test insert
            self.primary_conn[db][coll].insert(base_doc)

            assert_soon(lambda: len(docman._search()) == 1)
            self.assertEqual(docman._search()[0]["ns"], dest_mapping[ns])
            bad = [d for d in docman._search() if d["ns"] == ns]
            self.assertEqual(len(bad), 0)

            # test update
            self.primary_conn[db][coll].update({"_id": 1}, {"$set": {"weakness": "kryptonite"}})

            def update_complete():
                docs = docman._search()
                for d in docs:
                    if d.get("weakness") == "kryptonite":
                        return True
                    return False

            assert_soon(update_complete)
            self.assertEqual(docman._search()[0]["ns"], dest_mapping[ns])
            bad = [d for d in docman._search() if d["ns"] == ns]
            self.assertEqual(len(bad), 0)

            # test delete
            self.primary_conn[db][coll].remove({"_id": 1})
            assert_soon(lambda: len(docman._search()) == 0)
            bad = [d for d in docman._search() if d["ns"] == dest_mapping[ns]]
            self.assertEqual(len(bad), 0)

            # cleanup
            self.primary_conn[db][coll].remove()
            self.opman.doc_managers[0]._delete()

        # doc not in namespace set
        for ns in phony_ns:
            db, coll = ns.split(".", 1)

            # test insert
            self.primary_conn[db][coll].insert(base_doc)
            time.sleep(1)
            self.assertEqual(len(docman._search()), 0)
            # test update
            self.primary_conn[db][coll].update({"_id": 1}, {"$set": {"weakness": "kryptonite"}})
            time.sleep(1)
            self.assertEqual(len(docman._search()), 0)

    def test_many_targets(self):
        """Test that one OplogThread is capable of replicating to more than
        one target.
        """
        doc_managers = [DocManager(), DocManager(), DocManager()]
        self.opman.doc_managers = doc_managers

        # start replicating
        self.opman.start()
        self.primary_conn["test"]["test"].insert({"name": "kermit", "color": "green"})
        self.primary_conn["test"]["test"].insert({"name": "elmo", "color": "firetruck red"})

        assert_soon(
            lambda: sum(len(d._search()) for d in doc_managers) == 6,
            "OplogThread should be able to replicate to multiple targets",
        )

        self.primary_conn["test"]["test"].remove({"name": "elmo"})

        assert_soon(
            lambda: sum(len(d._search()) for d in doc_managers) == 3,
            "OplogThread should be able to replicate to multiple targets",
        )
        for d in doc_managers:
            self.assertEqual(d._search()[0]["name"], "kermit")

    def test_filter_oplog_entry(self):
        # Test oplog entries: these are callables, since
        # filter_oplog_entry modifies the oplog entry in-place
        insert_op = lambda: {"op": "i", "o": {"_id": 0, "a": 1, "b": 2, "c": 3}}
        update_op = lambda: {"op": "u", "o": {"$set": {"a": 4, "b": 5}, "$unset": {"c": True}}, "o2": {"_id": 1}}

        # Case 0: insert op, no fields provided
        self.opman.fields = None
        filtered = self.opman.filter_oplog_entry(insert_op())
        self.assertEqual(filtered, insert_op())

        # Case 1: insert op, fields provided
        self.opman.fields = ["a", "b"]
        filtered = self.opman.filter_oplog_entry(insert_op())
        self.assertEqual(filtered["o"], {"_id": 0, "a": 1, "b": 2})

        # Case 2: insert op, fields provided, doc becomes empty except for _id
        self.opman.fields = ["d", "e", "f"]
        filtered = self.opman.filter_oplog_entry(insert_op())
        self.assertEqual(filtered["o"], {"_id": 0})

        # Case 3: update op, no fields provided
        self.opman.fields = None
        filtered = self.opman.filter_oplog_entry(update_op())
        self.assertEqual(filtered, update_op())

        # Case 4: update op, fields provided
        self.opman.fields = ["a", "c"]
        filtered = self.opman.filter_oplog_entry(update_op())
        self.assertNotIn("b", filtered["o"]["$set"])
        self.assertIn("a", filtered["o"]["$set"])
        self.assertEqual(filtered["o"]["$unset"], update_op()["o"]["$unset"])

        # Case 5: update op, fields provided, empty $set
        self.opman.fields = ["c"]
        filtered = self.opman.filter_oplog_entry(update_op())
        self.assertNotIn("$set", filtered["o"])
        self.assertEqual(filtered["o"]["$unset"], update_op()["o"]["$unset"])

        # Case 6: update op, fields provided, empty $unset
        self.opman.fields = ["a", "b"]
        filtered = self.opman.filter_oplog_entry(update_op())
        self.assertNotIn("$unset", filtered["o"])
        self.assertEqual(filtered["o"]["$set"], update_op()["o"]["$set"])

        # Case 7: update op, fields provided, entry is nullified
        self.opman.fields = ["d", "e", "f"]
        filtered = self.opman.filter_oplog_entry(update_op())
        self.assertEqual(filtered, None)
Esempio n. 4
0
class TestFilterFields(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        cls.repl_set = ReplicaSetSingle().start()
        cls.primary_conn = cls.repl_set.client()
        cls.oplog_coll = cls.primary_conn.local['oplog.rs']

    @classmethod
    def tearDownClass(cls):
        cls.primary_conn.drop_database("test")
        close_client(cls.primary_conn)
        cls.repl_set.stop()

    def setUp(self):
        self.dest_mapping_stru = DestMapping([], [], {})
        self.opman = OplogThread(primary_client=self.primary_conn,
                                 doc_managers=(DocManager(), ),
                                 oplog_progress_dict=LockingDict(),
                                 dest_mapping_stru=self.dest_mapping_stru)

    def tearDown(self):
        try:
            self.opman.join()
        except RuntimeError:
            # OplogThread may not have been started
            pass

    def _check_fields(self, opman, fields, exclude_fields, projection):
        if fields:
            self.assertEqual(sorted(opman.fields), sorted(fields))
            self.assertEqual(opman._fields, set(fields))
        else:
            self.assertEqual(opman.fields, None)
            self.assertEqual(opman._fields, set([]))
        if exclude_fields:
            self.assertEqual(sorted(opman.exclude_fields),
                             sorted(exclude_fields))
            self.assertEqual(opman._exclude_fields, set(exclude_fields))
        else:
            self.assertEqual(opman.exclude_fields, None)
            self.assertEqual(opman._exclude_fields, set([]))

        self.assertEqual(opman._projection, projection)

    def test_filter_fields(self):
        docman = self.opman.doc_managers[0]
        conn = self.opman.primary_client

        include_fields = ["a", "b", "c"]
        exclude_fields = ["d", "e", "f"]

        # Set fields to care about
        self.opman.fields = include_fields
        # Documents have more than just these fields
        doc = {"a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6, "_id": 1}
        db = conn['test']['test']
        db.insert_one(doc)
        assert_soon(lambda: db.count() == 1)
        self.opman.dump_collection()

        result = docman._search()[0]
        keys = result.keys()
        for inc, exc in zip(include_fields, exclude_fields):
            self.assertIn(inc, keys)
            self.assertNotIn(exc, keys)

    def test_filter_exclude_oplog_entry(self):
        # Test oplog entries: these are callables, since
        # filter_oplog_entry modifies the oplog entry in-place
        insert_op = lambda: {
            "op": "i",
            "o": {
                "_id": 0,
                "a": 1,
                "b": 2,
                "c": 3
            }
        }
        update_op = lambda: {
            "op": "u",
            "o": {
                "$set": {
                    "a": 4,
                    "b": 5
                },
                "$unset": {
                    "c": True
                }
            },
            "o2": {
                "_id": 1
            }
        }

        # Case 0: insert op, no fields provided
        self.opman.exclude_fields = None
        filtered = self.opman.filter_oplog_entry(insert_op())
        self.assertEqual(filtered, insert_op())
        self.assertEqual(None, self.opman._projection)

        # Case 1: insert op, fields provided
        self.opman.exclude_fields = ['c']
        filtered = self.opman.filter_oplog_entry(insert_op())
        self.assertEqual(filtered['o'], {'_id': 0, 'a': 1, 'b': 2})
        self.assertEqual({'c': 0}, self.opman._projection)

        # Case 2: insert op, fields provided, doc becomes empty except for _id
        self.opman.exclude_fields = ['a', 'b', 'c']
        filtered = self.opman.filter_oplog_entry(insert_op())
        self.assertEqual(filtered['o'], {'_id': 0})
        self.assertEqual({'a': 0, 'b': 0, 'c': 0}, self.opman._projection)

        # Case 3: update op, no fields provided
        self.opman.exclude_fields = None
        filtered = self.opman.filter_oplog_entry(update_op())
        self.assertEqual(filtered, update_op())
        self.assertEqual(None, self.opman._projection)

        # Case 4: update op, fields provided
        self.opman.exclude_fields = ['b']
        filtered = self.opman.filter_oplog_entry(update_op())
        self.assertNotIn('b', filtered['o']['$set'])
        self.assertIn('a', filtered['o']['$set'])
        self.assertEqual(filtered['o']['$unset'], update_op()['o']['$unset'])
        self.assertEqual({'b': 0}, self.opman._projection)

        # Case 5: update op, fields provided, empty $set
        self.opman.exclude_fields = ['a', 'b']
        filtered = self.opman.filter_oplog_entry(update_op())
        self.assertNotIn('$set', filtered['o'])
        self.assertEqual(filtered['o']['$unset'], update_op()['o']['$unset'])
        self.assertEqual({'a': 0, 'b': 0}, self.opman._projection)

        # Case 6: update op, fields provided, empty $unset
        self.opman.exclude_fields = ['c']
        filtered = self.opman.filter_oplog_entry(update_op())
        self.assertNotIn('$unset', filtered['o'])
        self.assertEqual(filtered['o']['$set'], update_op()['o']['$set'])
        self.assertEqual({'c': 0}, self.opman._projection)

        # Case 7: update op, fields provided, entry is nullified
        self.opman.exclude_fields = ['a', 'b', 'c']
        filtered = self.opman.filter_oplog_entry(update_op())
        self.assertEqual(filtered, None)
        self.assertEqual({'a': 0, 'b': 0, 'c': 0}, self.opman._projection)

        # Case 8: update op, fields provided, replacement
        self.opman.exclude_fields = ['d', 'e', 'f']
        filtered = self.opman.filter_oplog_entry({
            'op': 'u',
            'o': {
                'a': 1,
                'b': 2,
                'c': 3,
                'd': 4
            }
        })
        self.assertEqual(filtered, {'op': 'u', 'o': {'a': 1, 'b': 2, 'c': 3}})
        self.assertEqual({'d': 0, 'e': 0, 'f': 0}, self.opman._projection)

    def test_filter_oplog_entry(self):
        # Test oplog entries: these are callables, since
        # filter_oplog_entry modifies the oplog entry in-place
        insert_op = lambda: {
            "op": "i",
            "o": {
                "_id": 0,
                "a": 1,
                "b": 2,
                "c": 3
            }
        }
        update_op = lambda: {
            "op": "u",
            "o": {
                "$set": {
                    "a": 4,
                    "b": 5
                },
                "$unset": {
                    "c": True
                }
            },
            "o2": {
                "_id": 1
            }
        }

        # Case 0: insert op, no fields provided
        self.opman.fields = None
        filtered = self.opman.filter_oplog_entry(insert_op())
        self.assertEqual(filtered, insert_op())
        self.assertEqual(None, self.opman._projection)

        # Case 1: insert op, fields provided
        self.opman.fields = ['a', 'b']
        filtered = self.opman.filter_oplog_entry(insert_op())
        self.assertEqual(filtered['o'], {'_id': 0, 'a': 1, 'b': 2})
        self.assertEqual({'_id': 1, 'a': 1, 'b': 1}, self.opman._projection)

        # Case 2: insert op, fields provided, doc becomes empty except for _id
        self.opman.fields = ['d', 'e', 'f']
        filtered = self.opman.filter_oplog_entry(insert_op())
        self.assertEqual(filtered['o'], {'_id': 0})
        self.assertEqual({
            '_id': 1,
            'd': 1,
            'e': 1,
            'f': 1
        }, self.opman._projection)

        # Case 3: update op, no fields provided
        self.opman.fields = None
        filtered = self.opman.filter_oplog_entry(update_op())
        self.assertEqual(filtered, update_op())
        self.assertEqual(None, self.opman._projection)

        # Case 4: update op, fields provided
        self.opman.fields = ['a', 'c']
        filtered = self.opman.filter_oplog_entry(update_op())
        self.assertNotIn('b', filtered['o']['$set'])
        self.assertIn('a', filtered['o']['$set'])
        self.assertEqual(filtered['o']['$unset'], update_op()['o']['$unset'])
        self.assertEqual({'_id': 1, 'a': 1, 'c': 1}, self.opman._projection)

        # Case 5: update op, fields provided, empty $set
        self.opman.fields = ['c']
        filtered = self.opman.filter_oplog_entry(update_op())
        self.assertNotIn('$set', filtered['o'])
        self.assertEqual(filtered['o']['$unset'], update_op()['o']['$unset'])
        self.assertEqual({'_id': 1, 'c': 1}, self.opman._projection)

        # Case 6: update op, fields provided, empty $unset
        self.opman.fields = ['a', 'b']
        filtered = self.opman.filter_oplog_entry(update_op())
        self.assertNotIn('$unset', filtered['o'])
        self.assertEqual(filtered['o']['$set'], update_op()['o']['$set'])
        self.assertEqual({'_id': 1, 'a': 1, 'b': 1}, self.opman._projection)

        # Case 7: update op, fields provided, entry is nullified
        self.opman.fields = ['d', 'e', 'f']
        filtered = self.opman.filter_oplog_entry(update_op())
        self.assertEqual(filtered, None)
        self.assertEqual({
            '_id': 1,
            'd': 1,
            'e': 1,
            'f': 1
        }, self.opman._projection)

        # Case 8: update op, fields provided, replacement
        self.opman.fields = ['a', 'b', 'c']
        filtered = self.opman.filter_oplog_entry({
            'op': 'u',
            'o': {
                'a': 1,
                'b': 2,
                'c': 3,
                'd': 4
            }
        })
        self.assertEqual(filtered, {'op': 'u', 'o': {'a': 1, 'b': 2, 'c': 3}})
        self.assertEqual({
            '_id': 1,
            'a': 1,
            'b': 1,
            'c': 1
        }, self.opman._projection)

    def test_exclude_fields_constructor(self):
        # Test with the "_id" field in exclude_fields
        exclude_fields = ["_id", "title", "content", "author"]
        opman = OplogThread(primary_client=self.primary_conn,
                            doc_managers=(DocManager(), ),
                            oplog_progress_dict=LockingDict(),
                            dest_mapping_stru=self.dest_mapping_stru,
                            exclude_fields=exclude_fields)
        exclude_fields.remove('_id')
        self._check_fields(opman, [], exclude_fields,
                           dict((f, 0) for f in exclude_fields))
        extra_fields = exclude_fields + ['extra1', 'extra2']
        filtered = opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual(dict((f, 1) for f in ['extra1', 'extra2']), filtered)

        # Test without "_id" field included in exclude_fields
        exclude_fields = ["title", "content", "author"]
        opman = OplogThread(primary_client=self.primary_conn,
                            doc_managers=(DocManager(), ),
                            oplog_progress_dict=LockingDict(),
                            dest_mapping_stru=self.dest_mapping_stru,
                            exclude_fields=exclude_fields)
        self._check_fields(opman, [], exclude_fields,
                           dict((f, 0) for f in exclude_fields))
        extra_fields = extra_fields + ['extra1', 'extra2']
        filtered = opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual({'extra1': 1, 'extra2': 1}, filtered)

        # Test with only "_id" field in exclude_fields
        exclude_fields = ["_id"]
        opman = OplogThread(primary_client=self.primary_conn,
                            doc_managers=(DocManager(), ),
                            oplog_progress_dict=LockingDict(),
                            dest_mapping_stru=self.dest_mapping_stru,
                            exclude_fields=exclude_fields)
        self._check_fields(opman, [], [], None)
        extra_fields = exclude_fields + ['extra1', 'extra2']
        filtered = opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual(dict((f, 1) for f in extra_fields), filtered)

        # Test with nothing set for exclude_fields
        opman = OplogThread(primary_client=self.primary_conn,
                            doc_managers=(DocManager(), ),
                            oplog_progress_dict=LockingDict(),
                            dest_mapping_stru=self.dest_mapping_stru,
                            exclude_fields=None)
        self._check_fields(opman, [], [], None)
        extra_fields = ['_id', 'extra1', 'extra2']
        filtered = opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual(dict((f, 1) for f in extra_fields), filtered)

    def test_fields_constructor(self):
        # Test with "_id" field in constructor
        fields = ["_id", "title", "content", "author"]
        opman = OplogThread(primary_client=self.primary_conn,
                            doc_managers=(DocManager(), ),
                            oplog_progress_dict=LockingDict(),
                            dest_mapping_stru=self.dest_mapping_stru,
                            fields=fields)
        self._check_fields(opman, fields, [], dict((f, 1) for f in fields))
        extra_fields = fields + ['extra1', 'extra2']
        filtered = opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual(dict((f, 1) for f in fields), filtered)

        # Test without "_id" field in constructor
        fields = ["title", "content", "author"]
        opman = OplogThread(primary_client=self.primary_conn,
                            doc_managers=(DocManager(), ),
                            oplog_progress_dict=LockingDict(),
                            dest_mapping_stru=self.dest_mapping_stru,
                            fields=fields)
        fields.append('_id')
        self._check_fields(opman, fields, [], dict((f, 1) for f in fields))
        extra_fields = fields + ['extra1', 'extra2']
        filtered = opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual(dict((f, 1) for f in fields), filtered)

        # Test with only "_id" field
        fields = ["_id"]
        opman = OplogThread(primary_client=self.primary_conn,
                            doc_managers=(DocManager(), ),
                            oplog_progress_dict=LockingDict(),
                            dest_mapping_stru=self.dest_mapping_stru,
                            fields=fields)
        self._check_fields(opman, fields, [], dict((f, 1) for f in fields))
        extra_fields = fields + ['extra1', 'extra2']
        filtered = opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual({'_id': 1}, filtered)

        # Test with no fields set
        opman = OplogThread(primary_client=self.primary_conn,
                            doc_managers=(DocManager(), ),
                            oplog_progress_dict=LockingDict(),
                            dest_mapping_stru=self.dest_mapping_stru)
        self._check_fields(opman, [], [], None)
        extra_fields = ['_id', 'extra1', 'extra2']
        filtered = opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual(dict((f, 1) for f in extra_fields), filtered)

    def test_exclude_fields_attr(self):
        # Test with the "_id" field in exclude_fields.
        exclude_fields = ["_id", "title", "content", "author"]
        exclude_fields.remove('_id')
        self.opman.exclude_fields = exclude_fields
        self._check_fields(self.opman, [], exclude_fields,
                           dict((f, 0) for f in exclude_fields))
        extra_fields = exclude_fields + ['extra1', 'extra2']
        filtered = self.opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual(dict((f, 1) for f in ['extra1', 'extra2']), filtered)

        # Test without "_id" field included in exclude_fields
        exclude_fields = ["title", "content", "author"]
        self.opman.exclude_fields = exclude_fields
        self._check_fields(self.opman, [], exclude_fields,
                           dict((f, 0) for f in exclude_fields))
        extra_fields = extra_fields + ['extra1', 'extra2']
        filtered = self.opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual({'extra1': 1, 'extra2': 1}, filtered)

        # Test with only "_id" field in exclude_fields
        exclude_fields = ["_id"]
        self.opman.exclude_fields = exclude_fields
        self._check_fields(self.opman, [], [], None)
        extra_fields = exclude_fields + ['extra1', 'extra2']
        filtered = self.opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual(dict((f, 1) for f in extra_fields), filtered)

        # Test with nothing set for exclude_fields
        self.opman.exclude_fields = None
        self._check_fields(self.opman, [], [], None)
        extra_fields = ['_id', 'extra1', 'extra2']
        filtered = self.opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual(dict((f, 1) for f in extra_fields), filtered)

    def test_fields_attr(self):
        # Test with "_id" field included in fields
        fields = ["_id", "title", "content", "author"]
        self.opman.fields = fields
        self._check_fields(self.opman, fields, [], dict(
            (f, 1) for f in fields))
        extra_fields = fields + ['extra1', 'extra2']
        filtered = self.opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual(dict((f, 1) for f in fields), filtered)

        # Test without "_id" field included in fields
        fields = ["title", "content", "author"]
        self.opman.fields = fields
        fields.append('_id')
        self._check_fields(self.opman, fields, [], dict(
            (f, 1) for f in fields))
        extra_fields = fields + ['extra1', 'extra2']
        filtered = self.opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual(dict((f, 1) for f in fields), filtered)

        # Test with only "_id" field
        fields = ["_id"]
        self.opman.fields = fields
        self._check_fields(self.opman, fields, [], dict(
            (f, 1) for f in fields))
        extra_fields = fields + ['extra1', 'extra2']
        filtered = self.opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual({'_id': 1}, filtered)

        # Test with no fields set
        self.opman.fields = None
        self._check_fields(self.opman, [], [], None)
        extra_fields = ['_id', 'extra1', 'extra2']
        filtered = self.opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual(dict((f, 1) for f in extra_fields), filtered)

    def test_nested_fields(self):
        def check_nested(document, fields, filtered_document):
            self.opman.fields = fields
            fields.append('_id')
            self.assertEqual(set(fields), self.opman._fields)
            self.assertEqual(sorted(fields), sorted(self.opman.fields))
            filtered_result = self.opman.filter_oplog_entry({
                'op': 'i',
                'o': document
            })['o']
            self.assertEqual(filtered_result, filtered_document)

        document = {'name': 'Han Solo', 'a': {'b': {}}}
        fields = ['name', 'a.b.c']
        filtered_document = {'name': 'Han Solo'}
        check_nested(document, fields, filtered_document)

        document = {
            'a': {
                'b': {
                    'c': 2,
                    'e': 3
                },
                'e': 5
            },
            'b': 2,
            'c': {
                'g': 1
            }
        }
        fields = ['a.b.c', 'a.e']
        filtered_document = {'a': {'b': {'c': 2}, 'e': 5}}
        check_nested(document, fields, filtered_document)

        document = {
            'a': {
                'b': {
                    'c': 2,
                    'e': 3
                },
                'e': 5
            },
            'b': 2,
            'c': {
                'g': 1
            },
            '_id': 1
        }
        fields = ['a.b.c', 'a.e']
        filtered_document = {'a': {'b': {'c': 2}, 'e': 5}, '_id': 1}
        check_nested(document, fields, filtered_document)

        document = {'a': {'b': {'c': {'d': 1}}}, '-a': {'-b': {'-c': 2}}}
        fields = ['a.b', '-a']
        filtered_document = document.copy()
        check_nested(document, fields, filtered_document)

        document = {'a': {'b': {'c': {'d': 1}}}, '-a': {'-b': {'-c': 2}}}
        fields = ['a', '-a.-b']
        filtered_document = document.copy()
        check_nested(document, fields, filtered_document)
        document = {
            'a': {
                'b': {
                    'c': {
                        'd': 1
                    }
                }
            },
            '-a': {
                '-b': {
                    '-c': 2
                }
            },
            '_id': 1
        }

        fields = ['a.b', '-a']
        filtered_document = document.copy()
        check_nested(document, fields, filtered_document)
        fields = ['a', '-a.-b']
        check_nested(document, fields, filtered_document)

        document = {'test': 1}
        fields = ['doesnt_exist']
        filtered_document = {}
        check_nested(document, fields, filtered_document)

        document = {'a': {'b': 1}, 'b': {'a': 1}}
        fields = ['a.b', 'b.a']
        filtered_document = document.copy()
        check_nested(document, fields, filtered_document)

        document = {'a': {'b': {'a': {'b': 1}}}, 'c': {'a': {'b': 1}}}
        fields = ['a.b']
        filtered_document = {'a': {'b': {'a': {'b': 1}}}}
        check_nested(document, fields, filtered_document)

        document = {'name': 'anna', 'name_of_cat': 'pushkin'}
        fields = ['name']
        filtered_document = {'name': 'anna'}
        check_nested(document, fields, filtered_document)

    def test_nested_exclude_fields(self):
        def check_nested(document, exclude_fields, filtered_document):
            self.opman.exclude_fields = exclude_fields
            if '_id' in exclude_fields:
                exclude_fields.remove('_id')
            self.assertEqual(set(exclude_fields), self.opman._exclude_fields)
            self.assertEqual(sorted(exclude_fields),
                             sorted(self.opman.exclude_fields))
            filtered_result = self.opman.filter_oplog_entry({
                'op': 'i',
                'o': document
            })['o']
            self.assertEqual(filtered_result, filtered_document)

        document = {'a': {'b': {'c': {'d': 0, 'e': 1}}}}
        exclude_fields = ['a.b.c.d']
        filtered_document = {'a': {'b': {'c': {'e': 1}}}}
        check_nested(document, exclude_fields, filtered_document)

        document = {'a': {'b': {'c': {'-a': 0, 'd': {'e': {'f': 1}}}}}}
        exclude_fields = ['a.b.c.d.e.f']
        filtered_document = {'a': {'b': {'c': {'-a': 0}}}}
        check_nested(document, exclude_fields, filtered_document)

        document = {'a': 1}
        exclude_fields = ['a']
        filtered_document = {}
        check_nested(document, exclude_fields, filtered_document)

        document = {
            'a': {
                'b': {
                    'c': 2,
                    'e': 3
                },
                'e': 5
            },
            'b': 2,
            'c': {
                'g': 1
            }
        }
        exclude_fields = ['a.b.c', 'a.e']
        filtered_document = {'a': {'b': {'e': 3}}, 'b': 2, 'c': {'g': 1}}
        check_nested(document, exclude_fields, filtered_document)

        document = {
            'a': {
                'b': {
                    'c': 2,
                    'e': 3
                },
                'e': 5
            },
            'b': 2,
            'c': {
                'g': 1
            },
            '_id': 1
        }
        exclude_fields = ['a.b.c', 'a.e', '_id']
        filtered_document = {
            'a': {
                'b': {
                    'e': 3
                }
            },
            'b': 2,
            'c': {
                'g': 1
            },
            '_id': 1
        }
        check_nested(document, exclude_fields, filtered_document)

        document = {'a': {'b': {'c': {'d': 1}}}, '-a': {'-b': {'-c': 2}}}
        exclude_fields = ['a.b', '-a']
        filtered_document = {}
        check_nested(document, exclude_fields, filtered_document)

        document = {'a': {'b': {'c': {'d': 1}}}, '-a': {'-b': {'-c': 2}}}
        exclude_fields = ['a', '-a.-b']
        filtered_document = {}
        check_nested(document, exclude_fields, filtered_document)

        document = {
            'a': {
                'b': {
                    'c': {
                        'd': 1
                    }
                }
            },
            '-a': {
                '-b': {
                    '-c': 2
                }
            },
            '_id': 1
        }
        exclude_fields = ['a.b', '-a']
        filtered_document = {'_id': 1}
        check_nested(document, exclude_fields, filtered_document)

        document = {'test': 1}
        exclude_fields = ['doesnt_exist']
        filtered_document = document.copy()
        check_nested(document, exclude_fields, filtered_document)

        document = {'test': 1}
        exclude_fields = ['test.doesnt_exist']
        filtered_document = document.copy()
        check_nested(document, exclude_fields, filtered_document)

        document = {'a': {'b': 1}, 'b': {'a': 1}}
        exclude_fields = ['a.b', 'b.a']
        filtered_document = {}
        check_nested(document, exclude_fields, filtered_document)

        document = {'a': {'b': {'a': {'b': 1}}}, 'c': {'a': {'b': 1}}}
        exclude_fields = ['a.b']
        filtered_document = {'c': {'a': {'b': 1}}}
        check_nested(document, exclude_fields, filtered_document)

        document = {'name': 'anna', 'name_of_cat': 'pushkin'}
        exclude_fields = ['name']
        filtered_document = {'name_of_cat': 'pushkin'}
        check_nested(document, exclude_fields, filtered_document)

    def test_fields_and_exclude(self):
        fields = ['a', 'b', 'c', '_id']
        exclude_fields = ['x', 'y', 'z']

        # Test setting both to None in constructor
        opman = OplogThread(primary_client=self.primary_conn,
                            doc_managers=(DocManager(), ),
                            oplog_progress_dict=LockingDict(),
                            dest_mapping_stru=self.dest_mapping_stru,
                            fields=None,
                            exclude_fields=None)
        self._check_fields(opman, [], [], None)
        opman = OplogThread(primary_client=self.primary_conn,
                            doc_managers=(DocManager(), ),
                            oplog_progress_dict=LockingDict(),
                            dest_mapping_stru=self.dest_mapping_stru,
                            fields=None,
                            exclude_fields=exclude_fields)
        self._check_fields(opman, [], exclude_fields,
                           dict((f, 0) for f in exclude_fields))
        # Test setting fields when exclude_fields is set
        self.assertRaises(errors.InvalidConfiguration, setattr, opman,
                          "fields", fields)
        self.assertRaises(errors.InvalidConfiguration, setattr, opman,
                          "fields", None)
        opman = OplogThread(primary_client=self.primary_conn,
                            doc_managers=(DocManager(), ),
                            oplog_progress_dict=LockingDict(),
                            dest_mapping_stru=self.dest_mapping_stru,
                            exclude_fields=None,
                            fields=fields)
        self._check_fields(opman, fields, [], dict((f, 1) for f in fields))
        self.assertRaises(errors.InvalidConfiguration, setattr, opman,
                          "exclude_fields", exclude_fields)
        self.assertRaises(errors.InvalidConfiguration, setattr, opman,
                          "exclude_fields", None)
        self.assertRaises(errors.InvalidConfiguration,
                          OplogThread,
                          self.primary_conn, (DocManager(), ),
                          LockingDict(),
                          self.dest_mapping_stru,
                          fields=fields,
                          exclude_fields=exclude_fields)
Esempio n. 5
0
class TestFilterFields(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        cls.repl_set = ReplicaSetSingle().start()
        cls.primary_conn = cls.repl_set.client()
        cls.oplog_coll = cls.primary_conn.local['oplog.rs']

    @classmethod
    def tearDownClass(cls):
        cls.primary_conn.drop_database("test")
        close_client(cls.primary_conn)
        cls.repl_set.stop()

    def setUp(self):
        self.namespace_config = NamespaceConfig()
        self.opman = OplogThread(primary_client=self.primary_conn,
                                 doc_managers=(DocManager(), ),
                                 oplog_progress_dict=LockingDict(),
                                 namespace_config=self.namespace_config)

    def tearDown(self):
        try:
            self.opman.join()
        except RuntimeError:
            # OplogThread may not have been started
            pass

    def reset_include_fields(self, fields):
        self.opman.namespace_config = NamespaceConfig(include_fields=fields)

    def reset_exclude_fields(self, fields):
        self.opman.namespace_config = NamespaceConfig(exclude_fields=fields)

    def test_filter_fields(self):
        docman = self.opman.doc_managers[0]
        conn = self.opman.primary_client

        include_fields = ["a", "b", "c"]
        exclude_fields = ["d", "e", "f"]

        # Set fields to care about
        self.reset_include_fields(include_fields)
        # Documents have more than just these fields
        doc = {"a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6, "_id": 1}
        db = conn['test']['test']
        db.insert_one(doc)
        assert_soon(lambda: db.count() == 1)
        self.opman.dump_collection()

        result = docman._search()[0]
        keys = result.keys()
        for inc, exc in zip(include_fields, exclude_fields):
            self.assertIn(inc, keys)
            self.assertNotIn(exc, keys)

    def test_filter_exclude_oplog_entry(self):
        # Test oplog entries: these are callables, since
        # filter_oplog_entry modifies the oplog entry in-place
        insert_op = lambda: {
            "op": "i",
            "o": {
                "_id": 0,
                "a": 1,
                "b": 2,
                "c": 3
            }
        }
        update_op = lambda: {
            "op": "u",
            "o": {
                "$set": {
                    "a": 4,
                    "b": 5
                },
                "$unset": {
                    "c": True
                }
            },
            "o2": {
                "_id": 1
            }
        }

        def filter_doc(document, fields):
            if fields and '_id' in fields:
                fields.remove('_id')
            return self.opman.filter_oplog_entry(document,
                                                 exclude_fields=fields)

        # Case 0: insert op, no fields provided
        filtered = filter_doc(insert_op(), None)
        self.assertEqual(filtered, insert_op())

        # Case 1: insert op, fields provided
        filtered = filter_doc(insert_op(), ['c'])
        self.assertEqual(filtered['o'], {'_id': 0, 'a': 1, 'b': 2})

        # Case 2: insert op, fields provided, doc becomes empty except for _id
        filtered = filter_doc(insert_op(), ['a', 'b', 'c'])
        self.assertEqual(filtered['o'], {'_id': 0})

        # Case 3: update op, no fields provided
        filtered = filter_doc(update_op(), None)
        self.assertEqual(filtered, update_op())

        # Case 4: update op, fields provided
        filtered = filter_doc(update_op(), ['b'])
        self.assertNotIn('b', filtered['o']['$set'])
        self.assertIn('a', filtered['o']['$set'])
        self.assertEqual(filtered['o']['$unset'], update_op()['o']['$unset'])

        # Case 5: update op, fields provided, empty $set
        filtered = filter_doc(update_op(), ['a', 'b'])
        self.assertNotIn('$set', filtered['o'])
        self.assertEqual(filtered['o']['$unset'], update_op()['o']['$unset'])

        # Case 6: update op, fields provided, empty $unset
        filtered = filter_doc(update_op(), ['c'])
        self.assertNotIn('$unset', filtered['o'])
        self.assertEqual(filtered['o']['$set'], update_op()['o']['$set'])

        # Case 7: update op, fields provided, entry is nullified
        filtered = filter_doc(update_op(), ['a', 'b', 'c'])
        self.assertEqual(filtered, None)

        # Case 8: update op, fields provided, replacement
        filtered = filter_doc(
            {
                'op': 'u',
                'o': {
                    'a': 1,
                    'b': 2,
                    'c': 3,
                    'd': 4
                }
            }, ['d', 'e', 'f'])
        self.assertEqual(filtered, {'op': 'u', 'o': {'a': 1, 'b': 2, 'c': 3}})

    def test_filter_oplog_entry(self):
        # Test oplog entries: these are callables, since
        # filter_oplog_entry modifies the oplog entry in-place
        insert_op = lambda: {
            "op": "i",
            "o": {
                "_id": 0,
                "a": 1,
                "b": 2,
                "c": 3
            }
        }
        update_op = lambda: {
            "op": "u",
            "o": {
                "$set": {
                    "a": 4,
                    "b": 5
                },
                "$unset": {
                    "c": True
                }
            },
            "o2": {
                "_id": 1
            }
        }

        def filter_doc(document, fields):
            if fields and '_id' not in fields:
                fields.append('_id')
            return self.opman.filter_oplog_entry(document,
                                                 include_fields=fields)

        # Case 0: insert op, no fields provided
        filtered = filter_doc(insert_op(), None)
        self.assertEqual(filtered, insert_op())

        # Case 1: insert op, fields provided
        filtered = filter_doc(insert_op(), ['a', 'b'])
        self.assertEqual(filtered['o'], {'_id': 0, 'a': 1, 'b': 2})

        # Case 2: insert op, fields provided, doc becomes empty except for _id
        filtered = filter_doc(insert_op(), ['d', 'e', 'f'])
        self.assertEqual(filtered['o'], {'_id': 0})

        # Case 3: update op, no fields provided
        filtered = filter_doc(update_op(), None)
        self.assertEqual(filtered, update_op())

        # Case 4: update op, fields provided
        filtered = filter_doc(update_op(), ['a', 'c'])
        self.assertNotIn('b', filtered['o']['$set'])
        self.assertIn('a', filtered['o']['$set'])
        self.assertEqual(filtered['o']['$unset'], update_op()['o']['$unset'])

        # Case 5: update op, fields provided, empty $set
        filtered = filter_doc(update_op(), ['c'])
        self.assertNotIn('$set', filtered['o'])
        self.assertEqual(filtered['o']['$unset'], update_op()['o']['$unset'])

        # Case 6: update op, fields provided, empty $unset
        filtered = filter_doc(update_op(), ['a', 'b'])
        self.assertNotIn('$unset', filtered['o'])
        self.assertEqual(filtered['o']['$set'], update_op()['o']['$set'])

        # Case 7: update op, fields provided, entry is nullified
        filtered = filter_doc(update_op(), ['d', 'e', 'f'])
        self.assertEqual(filtered, None)

        # Case 8: update op, fields provided, replacement
        filtered = filter_doc(
            {
                'op': 'u',
                'o': {
                    'a': 1,
                    'b': 2,
                    'c': 3,
                    'd': 4
                }
            }, ['a', 'b', 'c'])
        self.assertEqual(filtered, {'op': 'u', 'o': {'a': 1, 'b': 2, 'c': 3}})

    def test_nested_fields(self):
        def check_nested(document, fields, filtered_document, op='i'):
            if '_id' not in fields:
                fields.append('_id')
            filtered_result = self.opman.filter_oplog_entry(
                {
                    'op': op,
                    'o': document
                }, include_fields=fields)
            if filtered_result is not None:
                filtered_result = filtered_result['o']
            self.assertEqual(filtered_result, filtered_document)

        document = {'name': 'Han Solo', 'a': {'b': {}}}
        fields = ['name', 'a.b.c']
        filtered_document = {'name': 'Han Solo'}
        check_nested(document, fields, filtered_document)

        document = {
            'a': {
                'b': {
                    'c': 2,
                    'e': 3
                },
                'e': 5
            },
            'b': 2,
            'c': {
                'g': 1
            }
        }
        fields = ['a.b.c', 'a.e']
        filtered_document = {'a': {'b': {'c': 2}, 'e': 5}}
        check_nested(document, fields, filtered_document)

        document = {
            'a': {
                'b': {
                    'c': 2,
                    'e': 3
                },
                'e': 5
            },
            'b': 2,
            'c': {
                'g': 1
            },
            '_id': 1
        }
        fields = ['a.b.c', 'a.e']
        filtered_document = {'a': {'b': {'c': 2}, 'e': 5}, '_id': 1}
        check_nested(document, fields, filtered_document)

        document = {'a': {'b': {'c': {'d': 1}}}, '-a': {'-b': {'-c': 2}}}
        fields = ['a.b', '-a']
        filtered_document = document.copy()
        check_nested(document, fields, filtered_document)

        document = {'a': {'b': {'c': {'d': 1}}}, '-a': {'-b': {'-c': 2}}}
        fields = ['a', '-a.-b']
        filtered_document = document.copy()
        check_nested(document, fields, filtered_document)
        document = {
            'a': {
                'b': {
                    'c': {
                        'd': 1
                    }
                }
            },
            '-a': {
                '-b': {
                    '-c': 2
                }
            },
            '_id': 1
        }

        fields = ['a.b', '-a']
        filtered_document = document.copy()
        check_nested(document, fields, filtered_document)
        fields = ['a', '-a.-b']
        check_nested(document, fields, filtered_document)

        document = {'test': 1}
        fields = ['doesnt_exist']
        filtered_document = {}
        check_nested(document, fields, filtered_document)

        document = {'a': {'b': 1}, 'b': {'a': 1}}
        fields = ['a.b', 'b.a']
        filtered_document = document.copy()
        check_nested(document, fields, filtered_document)

        document = {'a': {'b': {'a': {'b': 1}}}, 'c': {'a': {'b': 1}}}
        fields = ['a.b']
        filtered_document = {'a': {'b': {'a': {'b': 1}}}}
        check_nested(document, fields, filtered_document)

        document = {'name': 'anna', 'name_of_cat': 'pushkin'}
        fields = ['name']
        filtered_document = {'name': 'anna'}
        check_nested(document, fields, filtered_document)

        update = {'$set': {'a.b': 1, 'a.c': 3, 'b': 2, 'c': {'b': 3}}}
        fields = ['a', 'c']
        filtered_update = {'$set': {'a.b': 1, 'a.c': 3, 'c': {'b': 3}}}
        check_nested(update, fields, filtered_update, op='u')

        update = {'$set': {'a.b': {'c': 3, 'd': 1}, 'a.e': 1, 'a.f': 2}}
        fields = ['a.b.c', 'a.e']
        filtered_update = {'$set': {'a.b': {'c': 3}, 'a.e': 1}}
        check_nested(update, fields, filtered_update, op='u')

        update = {'$set': {'a.b.1': 1, 'a.b.2': 2, 'b': 3}}
        fields = ['a.b']
        filtered_update = {'$set': {'a.b.1': 1, 'a.b.2': 2}}
        check_nested(update, fields, filtered_update, op='u')

        update = {'$set': {'a.b': {'c': 3, 'd': 1}, 'a.e': 1}}
        fields = ['a.b.e']
        filtered_update = None
        check_nested(update, fields, filtered_update, op='u')

    def test_nested_exclude_fields(self):
        def check_nested(document, exclude_fields, filtered_document, op='i'):
            if '_id' in exclude_fields:
                exclude_fields.remove('_id')
            filtered_result = self.opman.filter_oplog_entry(
                {
                    'op': op,
                    'o': document
                }, exclude_fields=exclude_fields)
            if filtered_result is not None:
                filtered_result = filtered_result['o']
            self.assertEqual(filtered_result, filtered_document)

        document = {'a': {'b': {'c': {'d': 0, 'e': 1}}}}
        exclude_fields = ['a.b.c.d']
        filtered_document = {'a': {'b': {'c': {'e': 1}}}}
        check_nested(document, exclude_fields, filtered_document)

        document = {'a': {'b': {'c': {'-a': 0, 'd': {'e': {'f': 1}}}}}}
        exclude_fields = ['a.b.c.d.e.f']
        filtered_document = {'a': {'b': {'c': {'-a': 0, 'd': {'e': {}}}}}}
        check_nested(document, exclude_fields, filtered_document)

        document = {'a': 1}
        exclude_fields = ['a']
        filtered_document = {}
        check_nested(document, exclude_fields, filtered_document)

        document = {
            'a': {
                'b': {
                    'c': 2,
                    'e': 3
                },
                'e': 5
            },
            'b': 2,
            'c': {
                'g': 1
            }
        }
        exclude_fields = ['a.b.c', 'a.e']
        filtered_document = {'a': {'b': {'e': 3}}, 'b': 2, 'c': {'g': 1}}
        check_nested(document, exclude_fields, filtered_document)

        document = {
            'a': {
                'b': {
                    'c': 2,
                    'e': 3
                },
                'e': 5
            },
            'b': 2,
            'c': {
                'g': 1
            },
            '_id': 1
        }
        exclude_fields = ['a.b.c', 'a.e', '_id']
        filtered_document = {
            'a': {
                'b': {
                    'e': 3
                }
            },
            'b': 2,
            'c': {
                'g': 1
            },
            '_id': 1
        }
        check_nested(document, exclude_fields, filtered_document)

        document = {'a': {'b': {'c': {'d': 1}}}, '-a': {'-b': {'-c': 2}}}
        exclude_fields = ['a.b', '-a']
        filtered_document = {'a': {}}
        check_nested(document, exclude_fields, filtered_document)

        document = {'a': {'b': {'c': {'d': 1}}}, '-a': {'-b': {'-c': 2}}}
        exclude_fields = ['a', '-a.-b']
        filtered_document = {'-a': {}}
        check_nested(document, exclude_fields, filtered_document)

        document = {
            'a': {
                'b': {
                    'c': {
                        'd': 1
                    }
                }
            },
            '-a': {
                '-b': {
                    '-c': 2
                }
            },
            '_id': 1
        }
        exclude_fields = ['a.b', '-a']
        filtered_document = {'_id': 1, 'a': {}}
        check_nested(document, exclude_fields, filtered_document)

        document = {'test': 1}
        exclude_fields = ['doesnt_exist']
        filtered_document = document.copy()
        check_nested(document, exclude_fields, filtered_document)

        document = {'test': 1}
        exclude_fields = ['test.doesnt_exist']
        filtered_document = document.copy()
        check_nested(document, exclude_fields, filtered_document)

        document = {'a': {'b': 1}, 'b': {'a': 1}}
        exclude_fields = ['a.b', 'b.a']
        filtered_document = {'a': {}, 'b': {}}
        check_nested(document, exclude_fields, filtered_document)

        document = {'a': {'b': {'a': {'b': 1}}}, 'c': {'a': {'b': 1}}}
        exclude_fields = ['a.b']
        filtered_document = {'a': {}, 'c': {'a': {'b': 1}}}
        check_nested(document, exclude_fields, filtered_document)

        document = {'name': 'anna', 'name_of_cat': 'pushkin'}
        exclude_fields = ['name']
        filtered_document = {'name_of_cat': 'pushkin'}
        check_nested(document, exclude_fields, filtered_document)

        update = {'$set': {'a.b': 1, 'a.c': 3, 'b': 2, 'c': {'b': 3}}}
        exclude_fields = ['a', 'c']
        filtered_update = {'$set': {'b': 2}}
        check_nested(update, exclude_fields, filtered_update, op='u')

        update = {'$set': {'a.b': {'c': 3, 'd': 1}, 'a.e': 1, 'a.f': 2}}
        exclude_fields = ['a.b.c', 'a.e']
        filtered_update = {'$set': {'a.b': {'d': 1}, 'a.f': 2}}
        check_nested(update, exclude_fields, filtered_update, op='u')

        update = {'$set': {'a.b': {'c': 3, 'd': 1}, 'a.e': 1}}
        exclude_fields = ['a.b.c', 'a.b.d', 'a.e']
        filtered_update = {'$set': {'a.b': {}}}
        check_nested(update, exclude_fields, filtered_update, op='u')

        update = {'$set': {'a.b.1': 1, 'a.b.2': 2, 'b': 3}}
        exclude_fields = ['a.b']
        filtered_update = {'$set': {'b': 3}}
        check_nested(update, exclude_fields, filtered_update, op='u')

        update = {'$set': {'a.b.c': 42, 'd.e.f': 123, 'g': 456}}
        exclude_fields = ['a.b', 'd']
        filtered_update = {'$set': {'g': 456}}
        check_nested(update, exclude_fields, filtered_update, op='u')

        update = {'$set': {'a.b': {'c': 3, 'd': 1}, 'a.e': 1}}
        exclude_fields = ['a.b', 'a.e']
        filtered_update = None
        check_nested(update, exclude_fields, filtered_update, op='u')
Esempio n. 6
0
class TestOplogManager(unittest.TestCase):
    """Defines all the testing methods, as well as a method that sets up the
        cluster
    """
    def setUp(self):
        _, _, self.primary_p = start_replica_set('test-oplog-manager')
        self.primary_conn = pymongo.MongoClient(mongo_host, self.primary_p)
        self.oplog_coll = self.primary_conn.local['oplog.rs']
        self.opman = OplogThread(primary_conn=self.primary_conn,
                                 main_address='%s:%d' %
                                 (mongo_host, self.primary_p),
                                 oplog_coll=self.oplog_coll,
                                 is_sharded=False,
                                 doc_manager=DocManager(),
                                 oplog_progress_dict=LockingDict(),
                                 namespace_set=None,
                                 auth_key=None,
                                 auth_username=None,
                                 repl_set='test-oplog-manager')

    def tearDown(self):
        try:
            self.opman.join()
        except RuntimeError:
            pass  # OplogThread may not have been started
        self.primary_conn.close()
        kill_replica_set('test-oplog-manager')

    def test_get_oplog_cursor(self):
        '''Test the get_oplog_cursor method'''

        # timestamp is None - all oplog entries are returned.
        cursor = self.opman.get_oplog_cursor(None)
        self.assertEqual(cursor.count(),
                         self.primary_conn["local"]["oplog.rs"].count())

        # earliest entry is the only one at/after timestamp
        doc = {"ts": bson.Timestamp(1000, 0), "i": 1}
        self.primary_conn["test"]["test"].insert(doc)
        latest_timestamp = self.opman.get_last_oplog_timestamp()
        cursor = self.opman.get_oplog_cursor(latest_timestamp)
        self.assertNotEqual(cursor, None)
        self.assertEqual(cursor.count(), 1)
        next_entry_id = next(cursor)['o']['_id']
        retrieved = self.primary_conn.test.test.find_one(next_entry_id)
        self.assertEqual(retrieved, doc)

        # many entries before and after timestamp
        self.primary_conn["test"]["test"].insert({"i": i}
                                                 for i in range(2, 1002))
        oplog_cursor = self.oplog_coll.find(sort=[("ts", pymongo.ASCENDING)])

        # startup + insert + 1000 inserts
        self.assertEqual(oplog_cursor.count(), 2 + 1000)
        pivot = oplog_cursor.skip(400).limit(1)[0]

        goc_cursor = self.opman.get_oplog_cursor(pivot["ts"])
        self.assertEqual(goc_cursor.count(), 2 + 1000 - 400)

    def test_get_last_oplog_timestamp(self):
        """Test the get_last_oplog_timestamp method"""

        # "empty" the oplog
        self.opman.oplog = self.primary_conn["test"]["emptycollection"]
        self.assertEqual(self.opman.get_last_oplog_timestamp(), None)

        # Test non-empty oplog
        self.opman.oplog = self.primary_conn["local"]["oplog.rs"]
        for i in range(1000):
            self.primary_conn["test"]["test"].insert({"i": i + 500})
        oplog = self.primary_conn["local"]["oplog.rs"]
        oplog = oplog.find().sort("$natural", pymongo.DESCENDING).limit(1)[0]
        self.assertEqual(self.opman.get_last_oplog_timestamp(), oplog["ts"])

    def test_dump_collection(self):
        """Test the dump_collection method

        Cases:

        1. empty oplog
        2. non-empty oplog
        """

        # Test with empty oplog
        self.opman.oplog = self.primary_conn["test"]["emptycollection"]
        last_ts = self.opman.dump_collection()
        self.assertEqual(last_ts, None)

        # Test with non-empty oplog
        self.opman.oplog = self.primary_conn["local"]["oplog.rs"]
        for i in range(1000):
            self.primary_conn["test"]["test"].insert({"i": i + 500})
        last_ts = self.opman.get_last_oplog_timestamp()
        self.assertEqual(last_ts, self.opman.dump_collection())
        self.assertEqual(len(self.opman.doc_managers[0]._search()), 1000)

    def test_dump_collection_with_error(self):
        """Test the dump_collection method with invalid documents.

        Cases:

        1. non-empty oplog, continue_on_error=True, invalid documents
        """

        # non-empty oplog, continue_on_error=True, invalid documents
        self.opman.continue_on_error = True
        self.opman.oplog = self.primary_conn["local"]["oplog.rs"]

        docs = [{'a': i} for i in range(100)]
        for i in range(50, 60):
            docs[i]['_upsert_exception'] = True
        self.primary_conn['test']['test'].insert(docs)

        last_ts = self.opman.get_last_oplog_timestamp()
        self.assertEqual(last_ts, self.opman.dump_collection())
        docs = self.opman.doc_managers[0]._search()
        docs.sort()

        self.assertEqual(len(docs), 90)
        for doc, correct_a in zip(docs, range(0, 50) + range(60, 100)):
            self.assertEquals(doc['a'], correct_a)

    def test_init_cursor(self):
        """Test the init_cursor method

        Cases:

        1. no last checkpoint, no collection dump
        2. no last checkpoint, collection dump ok and stuff to dump
        3. no last checkpoint, nothing to dump, stuff in oplog
        4. no last checkpoint, nothing to dump, nothing in oplog
        5. no last checkpoint, no collection dump, stuff in oplog
        6. last checkpoint exists
        7. last checkpoint is behind
        """

        # N.B. these sub-cases build off of each other and cannot be re-ordered
        # without side-effects

        # No last checkpoint, no collection dump, nothing in oplog
        # "change oplog collection" to put nothing in oplog
        self.opman.oplog = self.primary_conn["test"]["emptycollection"]
        self.opman.collection_dump = False
        self.assertTrue(
            all(doc['op'] == 'n' for doc in self.opman.init_cursor()[0]))
        self.assertEqual(self.opman.checkpoint, None)

        # No last checkpoint, empty collections, nothing in oplog
        self.opman.collection_dump = True
        cursor, cursor_len = self.opman.init_cursor()
        self.assertEqual(cursor, None)
        self.assertEqual(cursor_len, 0)
        self.assertEqual(self.opman.checkpoint, None)

        # No last checkpoint, empty collections, something in oplog
        self.opman.oplog = self.primary_conn['local']['oplog.rs']
        collection = self.primary_conn["test"]["test"]
        collection.insert({"i": 1})
        collection.remove({"i": 1})
        time.sleep(3)
        last_ts = self.opman.get_last_oplog_timestamp()
        cursor, cursor_len = self.opman.init_cursor()
        self.assertEqual(cursor_len, 0)
        self.assertEqual(self.opman.checkpoint, last_ts)
        with self.opman.oplog_progress as prog:
            self.assertEqual(prog.get_dict()[str(self.opman.oplog)], last_ts)

        # No last checkpoint, no collection dump, something in oplog
        self.opman.oplog_progress = LockingDict()
        self.opman.collection_dump = False
        collection.insert({"i": 2})
        last_ts = self.opman.get_last_oplog_timestamp()
        cursor, cursor_len = self.opman.init_cursor()
        for i in range(cursor_len - 1):
            next(cursor)
        self.assertEqual(next(cursor)['o']['i'], 2)
        self.assertEqual(self.opman.checkpoint, last_ts)

        # Last checkpoint exists
        progress = LockingDict()
        self.opman.oplog_progress = progress
        for i in range(1000):
            collection.insert({"i": i + 500})
        entry = list(self.primary_conn["local"]["oplog.rs"].find(skip=200,
                                                                 limit=2))
        progress.get_dict()[str(self.opman.oplog)] = entry[0]["ts"]
        self.opman.oplog_progress = progress
        self.opman.checkpoint = None
        cursor, cursor_len = self.opman.init_cursor()
        self.assertEqual(next(cursor)["ts"], entry[1]["ts"])
        self.assertEqual(self.opman.checkpoint, entry[0]["ts"])
        with self.opman.oplog_progress as prog:
            self.assertEqual(prog.get_dict()[str(self.opman.oplog)],
                             entry[0]["ts"])

        # Last checkpoint is behind
        progress = LockingDict()
        progress.get_dict()[str(self.opman.oplog)] = bson.Timestamp(1, 0)
        self.opman.oplog_progress = progress
        self.opman.checkpoint = None
        cursor, cursor_len = self.opman.init_cursor()
        self.assertEqual(cursor_len, 0)
        self.assertEqual(cursor, None)
        self.assertIsNotNone(self.opman.checkpoint)

    def test_filter_fields(self):
        docman = self.opman.doc_managers[0]
        conn = self.opman.main_connection

        include_fields = ["a", "b", "c"]
        exclude_fields = ["d", "e", "f"]

        # Set fields to care about
        self.opman.fields = include_fields
        # Documents have more than just these fields
        doc = {"a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6, "_id": 1}
        db = conn['test']['test']
        db.insert(doc)
        assert_soon(lambda: db.count() == 1)
        self.opman.dump_collection()

        result = docman._search()[0]
        keys = result.keys()
        for inc, exc in zip(include_fields, exclude_fields):
            self.assertIn(inc, keys)
            self.assertNotIn(exc, keys)

    def test_namespace_mapping(self):
        """Test mapping of namespaces
        Cases:

        upsert/delete/update of documents:
        1. in namespace set, mapping provided
        2. outside of namespace set, mapping provided
        """

        source_ns = ["test.test1", "test.test2"]
        phony_ns = ["test.phony1", "test.phony2"]
        dest_mapping = {
            "test.test1": "test.test1_dest",
            "test.test2": "test.test2_dest"
        }
        self.opman.dest_mapping = dest_mapping
        self.opman.namespace_set = source_ns
        docman = self.opman.doc_managers[0]
        # start replicating
        self.opman.start()

        base_doc = {"_id": 1, "name": "superman"}

        # doc in namespace set
        for ns in source_ns:
            db, coll = ns.split(".", 1)

            # test insert
            self.primary_conn[db][coll].insert(base_doc)

            assert_soon(lambda: len(docman._search()) == 1)
            self.assertEqual(docman._search()[0]["ns"], dest_mapping[ns])
            bad = [d for d in docman._search() if d["ns"] == ns]
            self.assertEqual(len(bad), 0)

            # test update
            self.primary_conn[db][coll].update(
                {"_id": 1}, {"$set": {
                    "weakness": "kryptonite"
                }})

            def update_complete():
                docs = docman._search()
                for d in docs:
                    if d.get("weakness") == "kryptonite":
                        return True
                    return False

            assert_soon(update_complete)
            self.assertEqual(docman._search()[0]["ns"], dest_mapping[ns])
            bad = [d for d in docman._search() if d["ns"] == ns]
            self.assertEqual(len(bad), 0)

            # test delete
            self.primary_conn[db][coll].remove({"_id": 1})
            assert_soon(lambda: len(docman._search()) == 0)
            bad = [d for d in docman._search() if d["ns"] == dest_mapping[ns]]
            self.assertEqual(len(bad), 0)

            # cleanup
            self.primary_conn[db][coll].remove()
            self.opman.doc_managers[0]._delete()

        # doc not in namespace set
        for ns in phony_ns:
            db, coll = ns.split(".", 1)

            # test insert
            self.primary_conn[db][coll].insert(base_doc)
            time.sleep(1)
            self.assertEqual(len(docman._search()), 0)
            # test update
            self.primary_conn[db][coll].update(
                {"_id": 1}, {"$set": {
                    "weakness": "kryptonite"
                }})
            time.sleep(1)
            self.assertEqual(len(docman._search()), 0)

    def test_many_targets(self):
        """Test that one OplogThread is capable of replicating to more than
        one target.
        """
        doc_managers = [DocManager(), DocManager(), DocManager()]
        self.opman.doc_managers = doc_managers

        # start replicating
        self.opman.start()
        self.primary_conn["test"]["test"].insert({
            "name": "kermit",
            "color": "green"
        })
        self.primary_conn["test"]["test"].insert({
            "name": "elmo",
            "color": "firetruck red"
        })

        assert_soon(
            lambda: sum(len(d._search()) for d in doc_managers) == 6,
            "OplogThread should be able to replicate to multiple targets")

        self.primary_conn["test"]["test"].remove({"name": "elmo"})

        assert_soon(
            lambda: sum(len(d._search()) for d in doc_managers) == 3,
            "OplogThread should be able to replicate to multiple targets")
        for d in doc_managers:
            self.assertEqual(d._search()[0]["name"], "kermit")

    def test_filter_oplog_entry(self):
        # Test oplog entries: these are callables, since
        # filter_oplog_entry modifies the oplog entry in-place
        insert_op = lambda: {
            "op": "i",
            "o": {
                "_id": 0,
                "a": 1,
                "b": 2,
                "c": 3
            }
        }
        update_op = lambda: {
            "op": "u",
            "o": {
                "$set": {
                    "a": 4,
                    "b": 5
                },
                "$unset": {
                    "c": True
                }
            },
            "o2": {
                "_id": 1
            }
        }

        # Case 0: insert op, no fields provided
        self.opman.fields = None
        filtered = self.opman.filter_oplog_entry(insert_op())
        self.assertEqual(filtered, insert_op())

        # Case 1: insert op, fields provided
        self.opman.fields = ['a', 'b']
        filtered = self.opman.filter_oplog_entry(insert_op())
        self.assertEqual(filtered['o'], {'_id': 0, 'a': 1, 'b': 2})

        # Case 2: insert op, fields provided, doc becomes empty except for _id
        self.opman.fields = ['d', 'e', 'f']
        filtered = self.opman.filter_oplog_entry(insert_op())
        self.assertEqual(filtered['o'], {'_id': 0})

        # Case 3: update op, no fields provided
        self.opman.fields = None
        filtered = self.opman.filter_oplog_entry(update_op())
        self.assertEqual(filtered, update_op())

        # Case 4: update op, fields provided
        self.opman.fields = ['a', 'c']
        filtered = self.opman.filter_oplog_entry(update_op())
        self.assertNotIn('b', filtered['o']['$set'])
        self.assertIn('a', filtered['o']['$set'])
        self.assertEqual(filtered['o']['$unset'], update_op()['o']['$unset'])

        # Case 5: update op, fields provided, empty $set
        self.opman.fields = ['c']
        filtered = self.opman.filter_oplog_entry(update_op())
        self.assertNotIn('$set', filtered['o'])
        self.assertEqual(filtered['o']['$unset'], update_op()['o']['$unset'])

        # Case 6: update op, fields provided, empty $unset
        self.opman.fields = ['a', 'b']
        filtered = self.opman.filter_oplog_entry(update_op())
        self.assertNotIn('$unset', filtered['o'])
        self.assertEqual(filtered['o']['$set'], update_op()['o']['$set'])

        # Case 7: update op, fields provided, entry is nullified
        self.opman.fields = ['d', 'e', 'f']
        filtered = self.opman.filter_oplog_entry(update_op())
        self.assertEqual(filtered, None)
    def test_fields_constructor(self):
        # Test with "_id" field in constructor
        fields = ["_id", "title", "content", "author"]
        opman = OplogThread(
            primary_client=self.primary_conn,
            doc_managers=(DocManager(),),
            oplog_progress_dict=LockingDict(),
            dest_mapping_stru=self.dest_mapping_stru,
            fields=fields
        )
        self._check_fields(opman, fields, [],
                           dict((f, 1) for f in fields))
        extra_fields = fields + ['extra1', 'extra2']
        filtered = opman.filter_oplog_entry(
            {'op': 'i',
             'o': dict((f, 1) for f in extra_fields)})['o']
        self.assertEqual(dict((f, 1) for f in fields), filtered)

        # Test without "_id" field in constructor
        fields = ["title", "content", "author"]
        opman = OplogThread(
            primary_client=self.primary_conn,
            doc_managers=(DocManager(),),
            oplog_progress_dict=LockingDict(),
            dest_mapping_stru=self.dest_mapping_stru,
            fields=fields
        )
        fields.append('_id')
        self._check_fields(opman, fields, [],
                           dict((f, 1) for f in fields))
        extra_fields = fields + ['extra1', 'extra2']
        filtered = opman.filter_oplog_entry(
            {'op': 'i',
             'o': dict((f, 1) for f in extra_fields)})['o']
        self.assertEqual(dict((f, 1) for f in fields), filtered)

        # Test with only "_id" field
        fields = ["_id"]
        opman = OplogThread(
            primary_client=self.primary_conn,
            doc_managers=(DocManager(),),
            oplog_progress_dict=LockingDict(),
            dest_mapping_stru=self.dest_mapping_stru,
            fields=fields
        )
        self._check_fields(opman, fields, [],
                           dict((f, 1) for f in fields))
        extra_fields = fields + ['extra1', 'extra2']
        filtered = opman.filter_oplog_entry(
            {'op': 'i',
             'o': dict((f, 1) for f in extra_fields)})['o']
        self.assertEqual({'_id': 1}, filtered)

        # Test with no fields set
        opman = OplogThread(
            primary_client=self.primary_conn,
            doc_managers=(DocManager(),),
            oplog_progress_dict=LockingDict(),
            dest_mapping_stru=self.dest_mapping_stru
        )
        self._check_fields(opman, [], [], None)
        extra_fields = ['_id', 'extra1', 'extra2']
        filtered = opman.filter_oplog_entry(
            {'op': 'i',
             'o': dict((f, 1) for f in extra_fields)})['o']
        self.assertEqual(dict((f, 1) for f in extra_fields), filtered)
    def test_fields(self):
        # Test with "_id" field in constructor
        fields = ["_id", "title", "content", "author"]
        opman = OplogThread(primary_client=self.primary_conn,
                            doc_managers=(DocManager(), ),
                            oplog_progress_dict=LockingDict(),
                            fields=fields)
        self.assertEqual(set(fields), opman._fields)
        self.assertEqual(sorted(fields), sorted(opman.fields))
        extra_fields = fields + ['extra1', 'extra2']
        filtered = opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual(dict((f, 1) for f in fields), filtered)

        # Test without "_id" field in constructor
        fields = ["title", "content", "author"]
        opman = OplogThread(primary_client=self.primary_conn,
                            doc_managers=(DocManager(), ),
                            oplog_progress_dict=LockingDict(),
                            fields=fields)
        self.assertEqual(set(fields), opman._fields)
        self.assertEqual(sorted(fields), sorted(opman.fields))
        extra_fields = fields + ['extra1', 'extra2']
        filtered = opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual(dict((f, 1) for f in fields), filtered)

        # Test with only "_id" field
        fields = ["_id"]
        opman = OplogThread(primary_client=self.primary_conn,
                            doc_managers=(DocManager(), ),
                            oplog_progress_dict=LockingDict(),
                            fields=fields)
        self.assertEqual(set(fields), opman._fields)
        self.assertEqual(fields, opman.fields)
        extra_fields = fields + ['extra1', 'extra2']
        filtered = opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual({'_id': 1}, filtered)

        # Test with no fields set
        opman = OplogThread(
            primary_client=self.primary_conn,
            doc_managers=(DocManager(), ),
            oplog_progress_dict=LockingDict(),
        )
        self.assertEqual(set([]), opman._fields)
        self.assertEqual(None, opman.fields)
        extra_fields = ['_id', 'extra1', 'extra2']
        filtered = opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual(dict((f, 1) for f in extra_fields), filtered)
class TestFilterFields(unittest.TestCase):

    @classmethod
    def setUpClass(cls):
        cls.repl_set = ReplicaSetSingle().start()
        cls.primary_conn = cls.repl_set.client()
        cls.oplog_coll = cls.primary_conn.local['oplog.rs']

    @classmethod
    def tearDownClass(cls):
        cls.primary_conn.drop_database("test")
        close_client(cls.primary_conn)
        cls.repl_set.stop()

    def setUp(self):
        self.dest_mapping_stru = DestMapping([], [], {})
        self.opman = OplogThread(
            primary_client=self.primary_conn,
            doc_managers=(DocManager(),),
            oplog_progress_dict=LockingDict(),
            dest_mapping_stru=self.dest_mapping_stru
        )

    def tearDown(self):
        try:
            self.opman.join()
        except RuntimeError:
            # OplogThread may not have been started
            pass

    def _check_fields(self, opman, fields, exclude_fields, projection):
        if fields:
            self.assertEqual(sorted(opman.fields), sorted(fields))
            self.assertEqual(opman._fields, set(fields))
        else:
            self.assertEqual(opman.fields, None)
            self.assertEqual(opman._fields, set([]))
        if exclude_fields:
            self.assertEqual(sorted(opman.exclude_fields),
                             sorted(exclude_fields))
            self.assertEqual(opman._exclude_fields, set(exclude_fields))
        else:
            self.assertEqual(opman.exclude_fields, None)
            self.assertEqual(opman._exclude_fields, set([]))

        self.assertEqual(opman._projection, projection)

    def test_filter_fields(self):
        docman = self.opman.doc_managers[0]
        conn = self.opman.primary_client

        include_fields = ["a", "b", "c"]
        exclude_fields = ["d", "e", "f"]

        # Set fields to care about
        self.opman.fields = include_fields
        # Documents have more than just these fields
        doc = {
            "a": 1, "b": 2, "c": 3,
            "d": 4, "e": 5, "f": 6,
            "_id": 1
        }
        db = conn['test']['test']
        db.insert_one(doc)
        assert_soon(lambda: db.count() == 1)
        self.opman.dump_collection()

        result = docman._search()[0]
        keys = result.keys()
        for inc, exc in zip(include_fields, exclude_fields):
            self.assertIn(inc, keys)
            self.assertNotIn(exc, keys)

    def test_filter_exclude_oplog_entry(self):
        # Test oplog entries: these are callables, since
        # filter_oplog_entry modifies the oplog entry in-place
        insert_op = lambda: {
            "op": "i",
            "o": {
                "_id": 0,
                "a": 1,
                "b": 2,
                "c": 3
            }
        }
        update_op = lambda: {
            "op": "u",
            "o": {
                "$set": {
                    "a": 4,
                    "b": 5
                },
                "$unset": {
                    "c": True
                }
            },
            "o2": {
                "_id": 1
            }
        }

        # Case 0: insert op, no fields provided
        self.opman.exclude_fields = None
        filtered = self.opman.filter_oplog_entry(insert_op())
        self.assertEqual(filtered, insert_op())
        self.assertEqual(None, self.opman._projection)

        # Case 1: insert op, fields provided
        self.opman.exclude_fields = ['c']
        filtered = self.opman.filter_oplog_entry(insert_op())
        self.assertEqual(filtered['o'], {'_id': 0, 'a': 1, 'b': 2})
        self.assertEqual({'c': 0}, self.opman._projection)

        # Case 2: insert op, fields provided, doc becomes empty except for _id
        self.opman.exclude_fields = ['a', 'b', 'c']
        filtered = self.opman.filter_oplog_entry(insert_op())
        self.assertEqual(filtered['o'], {'_id': 0})
        self.assertEqual({'a': 0, 'b': 0, 'c': 0}, self.opman._projection)

        # Case 3: update op, no fields provided
        self.opman.exclude_fields = None
        filtered = self.opman.filter_oplog_entry(update_op())
        self.assertEqual(filtered, update_op())
        self.assertEqual(None, self.opman._projection)

        # Case 4: update op, fields provided
        self.opman.exclude_fields = ['b']
        filtered = self.opman.filter_oplog_entry(update_op())
        self.assertNotIn('b', filtered['o']['$set'])
        self.assertIn('a', filtered['o']['$set'])
        self.assertEqual(filtered['o']['$unset'], update_op()['o']['$unset'])
        self.assertEqual({'b': 0}, self.opman._projection)

        # Case 5: update op, fields provided, empty $set
        self.opman.exclude_fields = ['a', 'b']
        filtered = self.opman.filter_oplog_entry(update_op())
        self.assertNotIn('$set', filtered['o'])
        self.assertEqual(filtered['o']['$unset'], update_op()['o']['$unset'])
        self.assertEqual({'a': 0, 'b': 0}, self.opman._projection)

        # Case 6: update op, fields provided, empty $unset
        self.opman.exclude_fields = ['c']
        filtered = self.opman.filter_oplog_entry(update_op())
        self.assertNotIn('$unset', filtered['o'])
        self.assertEqual(filtered['o']['$set'], update_op()['o']['$set'])
        self.assertEqual({'c': 0}, self.opman._projection)

        # Case 7: update op, fields provided, entry is nullified
        self.opman.exclude_fields = ['a', 'b', 'c']
        filtered = self.opman.filter_oplog_entry(update_op())
        self.assertEqual(filtered, None)
        self.assertEqual({'a': 0, 'b': 0, 'c': 0}, self.opman._projection)

        # Case 8: update op, fields provided, replacement
        self.opman.exclude_fields = ['d', 'e', 'f']
        filtered = self.opman.filter_oplog_entry({
            'op': 'u',
            'o': {'a': 1, 'b': 2, 'c': 3, 'd': 4}
        })
        self.assertEqual(
            filtered, {'op': 'u', 'o': {'a': 1, 'b': 2, 'c': 3}})
        self.assertEqual({'d': 0, 'e': 0, 'f': 0}, self.opman._projection)

    def test_filter_oplog_entry(self):
        # Test oplog entries: these are callables, since
        # filter_oplog_entry modifies the oplog entry in-place
        insert_op = lambda: {
            "op": "i",
            "o": {
                "_id": 0,
                "a": 1,
                "b": 2,
                "c": 3
            }
        }
        update_op = lambda: {
            "op": "u",
            "o": {
                "$set": {
                    "a": 4,
                    "b": 5
                },
                "$unset": {
                    "c": True
                }
            },
            "o2": {
                "_id": 1
            }
        }

        # Case 0: insert op, no fields provided
        self.opman.fields = None
        filtered = self.opman.filter_oplog_entry(insert_op())
        self.assertEqual(filtered, insert_op())
        self.assertEqual(None, self.opman._projection)

        # Case 1: insert op, fields provided
        self.opman.fields = ['a', 'b']
        filtered = self.opman.filter_oplog_entry(insert_op())
        self.assertEqual(filtered['o'], {'_id': 0, 'a': 1, 'b': 2})
        self.assertEqual({'_id': 1, 'a': 1, 'b': 1}, self.opman._projection)

        # Case 2: insert op, fields provided, doc becomes empty except for _id
        self.opman.fields = ['d', 'e', 'f']
        filtered = self.opman.filter_oplog_entry(insert_op())
        self.assertEqual(filtered['o'], {'_id': 0})
        self.assertEqual({'_id': 1, 'd': 1, 'e': 1, 'f': 1},
                         self.opman._projection)

        # Case 3: update op, no fields provided
        self.opman.fields = None
        filtered = self.opman.filter_oplog_entry(update_op())
        self.assertEqual(filtered, update_op())
        self.assertEqual(None, self.opman._projection)

        # Case 4: update op, fields provided
        self.opman.fields = ['a', 'c']
        filtered = self.opman.filter_oplog_entry(update_op())
        self.assertNotIn('b', filtered['o']['$set'])
        self.assertIn('a', filtered['o']['$set'])
        self.assertEqual(filtered['o']['$unset'], update_op()['o']['$unset'])
        self.assertEqual({'_id': 1, 'a': 1, 'c': 1}, self.opman._projection)

        # Case 5: update op, fields provided, empty $set
        self.opman.fields = ['c']
        filtered = self.opman.filter_oplog_entry(update_op())
        self.assertNotIn('$set', filtered['o'])
        self.assertEqual(filtered['o']['$unset'], update_op()['o']['$unset'])
        self.assertEqual({'_id': 1, 'c': 1}, self.opman._projection)

        # Case 6: update op, fields provided, empty $unset
        self.opman.fields = ['a', 'b']
        filtered = self.opman.filter_oplog_entry(update_op())
        self.assertNotIn('$unset', filtered['o'])
        self.assertEqual(filtered['o']['$set'], update_op()['o']['$set'])
        self.assertEqual({'_id': 1, 'a': 1, 'b': 1}, self.opman._projection)

        # Case 7: update op, fields provided, entry is nullified
        self.opman.fields = ['d', 'e', 'f']
        filtered = self.opman.filter_oplog_entry(update_op())
        self.assertEqual(filtered, None)
        self.assertEqual({'_id': 1, 'd': 1, 'e': 1, 'f': 1},
                         self.opman._projection)

        # Case 8: update op, fields provided, replacement
        self.opman.fields = ['a', 'b', 'c']
        filtered = self.opman.filter_oplog_entry({
            'op': 'u',
            'o': {'a': 1, 'b': 2, 'c': 3, 'd': 4}
        })
        self.assertEqual(
            filtered, {'op': 'u', 'o': {'a': 1, 'b': 2, 'c': 3}})
        self.assertEqual({'_id': 1, 'a': 1, 'b': 1, 'c': 1},
                         self.opman._projection)

    def test_exclude_fields_constructor(self):
        # Test with the "_id" field in exclude_fields
        exclude_fields = ["_id", "title", "content", "author"]
        opman = OplogThread(
            primary_client=self.primary_conn,
            doc_managers=(DocManager(),),
            oplog_progress_dict=LockingDict(),
            dest_mapping_stru=self.dest_mapping_stru,
            exclude_fields=exclude_fields
        )
        exclude_fields.remove('_id')
        self._check_fields(opman, [], exclude_fields,
                           dict((f, 0) for f in exclude_fields))
        extra_fields = exclude_fields + ['extra1', 'extra2']
        filtered = opman.filter_oplog_entry(
            {'op': 'i',
             'o': dict((f, 1) for f in extra_fields)})['o']
        self.assertEqual(dict((f, 1) for f in ['extra1', 'extra2']), filtered)

        # Test without "_id" field included in exclude_fields
        exclude_fields = ["title", "content", "author"]
        opman = OplogThread(
            primary_client=self.primary_conn,
            doc_managers=(DocManager(),),
            oplog_progress_dict=LockingDict(),
            dest_mapping_stru=self.dest_mapping_stru,
            exclude_fields=exclude_fields
        )
        self._check_fields(opman, [], exclude_fields,
                           dict((f, 0) for f in exclude_fields))
        extra_fields = extra_fields + ['extra1', 'extra2']
        filtered = opman.filter_oplog_entry(
            {'op': 'i',
             'o': dict((f, 1) for f in extra_fields)})['o']
        self.assertEqual({'extra1': 1, 'extra2': 1}, filtered)

        # Test with only "_id" field in exclude_fields
        exclude_fields = ["_id"]
        opman = OplogThread(
            primary_client=self.primary_conn,
            doc_managers=(DocManager(),),
            oplog_progress_dict=LockingDict(),
            dest_mapping_stru=self.dest_mapping_stru,
            exclude_fields=exclude_fields
        )
        self._check_fields(opman, [], [], None)
        extra_fields = exclude_fields + ['extra1', 'extra2']
        filtered = opman.filter_oplog_entry(
            {'op': 'i',
             'o': dict((f, 1) for f in extra_fields)})['o']
        self.assertEqual(dict((f, 1) for f in extra_fields), filtered)

        # Test with nothing set for exclude_fields
        opman = OplogThread(
            primary_client=self.primary_conn,
            doc_managers=(DocManager(),),
            oplog_progress_dict=LockingDict(),
            dest_mapping_stru=self.dest_mapping_stru,
            exclude_fields=None
        )
        self._check_fields(opman, [], [], None)
        extra_fields = ['_id', 'extra1', 'extra2']
        filtered = opman.filter_oplog_entry(
            {'op': 'i',
             'o': dict((f, 1) for f in extra_fields)})['o']
        self.assertEqual(dict((f, 1) for f in extra_fields), filtered)

    def test_fields_constructor(self):
        # Test with "_id" field in constructor
        fields = ["_id", "title", "content", "author"]
        opman = OplogThread(
            primary_client=self.primary_conn,
            doc_managers=(DocManager(),),
            oplog_progress_dict=LockingDict(),
            dest_mapping_stru=self.dest_mapping_stru,
            fields=fields
        )
        self._check_fields(opman, fields, [],
                           dict((f, 1) for f in fields))
        extra_fields = fields + ['extra1', 'extra2']
        filtered = opman.filter_oplog_entry(
            {'op': 'i',
             'o': dict((f, 1) for f in extra_fields)})['o']
        self.assertEqual(dict((f, 1) for f in fields), filtered)

        # Test without "_id" field in constructor
        fields = ["title", "content", "author"]
        opman = OplogThread(
            primary_client=self.primary_conn,
            doc_managers=(DocManager(),),
            oplog_progress_dict=LockingDict(),
            dest_mapping_stru=self.dest_mapping_stru,
            fields=fields
        )
        fields.append('_id')
        self._check_fields(opman, fields, [],
                           dict((f, 1) for f in fields))
        extra_fields = fields + ['extra1', 'extra2']
        filtered = opman.filter_oplog_entry(
            {'op': 'i',
             'o': dict((f, 1) for f in extra_fields)})['o']
        self.assertEqual(dict((f, 1) for f in fields), filtered)

        # Test with only "_id" field
        fields = ["_id"]
        opman = OplogThread(
            primary_client=self.primary_conn,
            doc_managers=(DocManager(),),
            oplog_progress_dict=LockingDict(),
            dest_mapping_stru=self.dest_mapping_stru,
            fields=fields
        )
        self._check_fields(opman, fields, [],
                           dict((f, 1) for f in fields))
        extra_fields = fields + ['extra1', 'extra2']
        filtered = opman.filter_oplog_entry(
            {'op': 'i',
             'o': dict((f, 1) for f in extra_fields)})['o']
        self.assertEqual({'_id': 1}, filtered)

        # Test with no fields set
        opman = OplogThread(
            primary_client=self.primary_conn,
            doc_managers=(DocManager(),),
            oplog_progress_dict=LockingDict(),
            dest_mapping_stru=self.dest_mapping_stru
        )
        self._check_fields(opman, [], [], None)
        extra_fields = ['_id', 'extra1', 'extra2']
        filtered = opman.filter_oplog_entry(
            {'op': 'i',
             'o': dict((f, 1) for f in extra_fields)})['o']
        self.assertEqual(dict((f, 1) for f in extra_fields), filtered)

    def test_exclude_fields_attr(self):
        # Test with the "_id" field in exclude_fields.
        exclude_fields = ["_id", "title", "content", "author"]
        exclude_fields.remove('_id')
        self.opman.exclude_fields = exclude_fields
        self._check_fields(self.opman, [], exclude_fields,
                           dict((f, 0) for f in exclude_fields))
        extra_fields = exclude_fields + ['extra1', 'extra2']
        filtered = self.opman.filter_oplog_entry(
            {'op': 'i',
             'o': dict((f, 1) for f in extra_fields)})['o']
        self.assertEqual(dict((f, 1) for f in ['extra1', 'extra2']), filtered)

        # Test without "_id" field included in exclude_fields
        exclude_fields = ["title", "content", "author"]
        self.opman.exclude_fields = exclude_fields
        self._check_fields(self.opman, [], exclude_fields,
                           dict((f, 0) for f in exclude_fields))
        extra_fields = extra_fields + ['extra1', 'extra2']
        filtered = self.opman.filter_oplog_entry(
            {'op': 'i',
             'o': dict((f, 1) for f in extra_fields)})['o']
        self.assertEqual({'extra1': 1, 'extra2': 1}, filtered)

        # Test with only "_id" field in exclude_fields
        exclude_fields = ["_id"]
        self.opman.exclude_fields = exclude_fields
        self._check_fields(self.opman, [], [], None)
        extra_fields = exclude_fields + ['extra1', 'extra2']
        filtered = self.opman.filter_oplog_entry(
            {'op': 'i',
             'o': dict((f, 1) for f in extra_fields)})['o']
        self.assertEqual(dict((f, 1) for f in extra_fields), filtered)

        # Test with nothing set for exclude_fields
        self.opman.exclude_fields = None
        self._check_fields(self.opman, [], [], None)
        extra_fields = ['_id', 'extra1', 'extra2']
        filtered = self.opman.filter_oplog_entry(
            {'op': 'i',
             'o': dict((f, 1) for f in extra_fields)})['o']
        self.assertEqual(dict((f, 1) for f in extra_fields), filtered)

    def test_fields_attr(self):
        # Test with "_id" field included in fields
        fields = ["_id", "title", "content", "author"]
        self.opman.fields = fields
        self._check_fields(self.opman, fields, [],
                           dict((f, 1) for f in fields))
        extra_fields = fields + ['extra1', 'extra2']
        filtered = self.opman.filter_oplog_entry(
            {'op': 'i',
             'o': dict((f, 1) for f in extra_fields)})['o']
        self.assertEqual(dict((f, 1) for f in fields), filtered)

        # Test without "_id" field included in fields
        fields = ["title", "content", "author"]
        self.opman.fields = fields
        fields.append('_id')
        self._check_fields(self.opman, fields, [],
                           dict((f, 1) for f in fields))
        extra_fields = fields + ['extra1', 'extra2']
        filtered = self.opman.filter_oplog_entry(
            {'op': 'i',
             'o': dict((f, 1) for f in extra_fields)})['o']
        self.assertEqual(dict((f, 1) for f in fields), filtered)

        # Test with only "_id" field
        fields = ["_id"]
        self.opman.fields = fields
        self._check_fields(self.opman, fields, [],
                           dict((f, 1) for f in fields))
        extra_fields = fields + ['extra1', 'extra2']
        filtered = self.opman.filter_oplog_entry(
            {'op': 'i',
             'o': dict((f, 1) for f in extra_fields)})['o']
        self.assertEqual({'_id': 1}, filtered)

        # Test with no fields set
        self.opman.fields = None
        self._check_fields(self.opman, [], [], None)
        extra_fields = ['_id', 'extra1', 'extra2']
        filtered = self.opman.filter_oplog_entry(
            {'op': 'i',
             'o': dict((f, 1) for f in extra_fields)})['o']
        self.assertEqual(dict((f, 1) for f in extra_fields), filtered)

    def test_nested_fields(self):
        def check_nested(document, fields, filtered_document, op='i'):
            self.opman.fields = fields
            fields.append('_id')
            self.assertEqual(set(fields), self.opman._fields)
            self.assertEqual(sorted(fields), sorted(self.opman.fields))
            filtered_result = self.opman.filter_oplog_entry(
                {'op': op, 'o': document})
            if filtered_result is not None:
                filtered_result = filtered_result['o']
            self.assertEqual(filtered_result, filtered_document)

        document = {'name': 'Han Solo', 'a': {'b': {}}}
        fields = ['name', 'a.b.c']
        filtered_document = {'name': 'Han Solo'}
        check_nested(document, fields, filtered_document)

        document = {'a': {'b': {'c': 2, 'e': 3}, 'e': 5},
                    'b': 2,
                    'c': {'g': 1}}
        fields = ['a.b.c', 'a.e']
        filtered_document = {'a': {'b': {'c': 2}, 'e': 5}}
        check_nested(document, fields, filtered_document)

        document = {'a': {'b': {'c': 2, 'e': 3}, 'e': 5},
                    'b': 2,
                    'c': {'g': 1},
                    '_id': 1}
        fields = ['a.b.c', 'a.e']
        filtered_document = {'a': {'b': {'c': 2}, 'e': 5}, '_id': 1}
        check_nested(document, fields, filtered_document)

        document = {'a': {'b': {'c': {'d': 1}}}, '-a': {'-b': {'-c': 2}}}
        fields = ['a.b', '-a']
        filtered_document = document.copy()
        check_nested(document, fields, filtered_document)

        document = {'a': {'b': {'c': {'d': 1}}}, '-a': {'-b': {'-c': 2}}}
        fields = ['a', '-a.-b']
        filtered_document = document.copy()
        check_nested(document, fields, filtered_document)
        document = {'a': {'b': {'c': {'d': 1}}}, '-a': {'-b': {'-c': 2}},
                    '_id': 1}

        fields = ['a.b', '-a']
        filtered_document = document.copy()
        check_nested(document, fields, filtered_document)
        fields = ['a', '-a.-b']
        check_nested(document, fields, filtered_document)

        document = {'test': 1}
        fields = ['doesnt_exist']
        filtered_document = {}
        check_nested(document, fields, filtered_document)

        document = {'a': {'b': 1}, 'b': {'a': 1}}
        fields = ['a.b', 'b.a']
        filtered_document = document.copy()
        check_nested(document, fields, filtered_document)

        document = {'a': {'b': {'a': {'b': 1}}}, 'c': {'a': {'b': 1}}}
        fields = ['a.b']
        filtered_document = {'a': {'b': {'a': {'b': 1}}}}
        check_nested(document, fields, filtered_document)

        document = {'name': 'anna', 'name_of_cat': 'pushkin'}
        fields = ['name']
        filtered_document = {'name': 'anna'}
        check_nested(document, fields, filtered_document)

        update = {'$set': {'a.b': 1, 'a.c': 3, 'b': 2, 'c': {'b': 3}}}
        fields = ['a', 'c']
        filtered_update = {'$set': {'a.b': 1, 'a.c': 3, 'c': {'b': 3}}}
        check_nested(update, fields, filtered_update, op='u')

        update = {'$set': {'a.b': {'c': 3, 'd': 1}, 'a.e': 1, 'a.f': 2}}
        fields = ['a.b.c', 'a.e']
        filtered_update = {'$set': {'a.b': {'c': 3}, 'a.e': 1}}
        check_nested(update, fields, filtered_update, op='u')

        update = {'$set': {'a.b.1': 1, 'a.b.2': 2, 'b': 3}}
        fields = ['a.b']
        filtered_update = {'$set': {'a.b.1': 1, 'a.b.2': 2}}
        check_nested(update, fields, filtered_update, op='u')

        update = {'$set': {'a.b': {'c': 3, 'd': 1}, 'a.e': 1}}
        fields = ['a.b.e']
        filtered_update = None
        check_nested(update, fields, filtered_update, op='u')

    def test_nested_exclude_fields(self):
        def check_nested(document, exclude_fields, filtered_document, op='i'):
            self.opman.exclude_fields = exclude_fields
            if '_id' in exclude_fields:
                exclude_fields.remove('_id')
            self.assertEqual(set(exclude_fields), self.opman._exclude_fields)
            self.assertEqual(sorted(exclude_fields),
                             sorted(self.opman.exclude_fields))
            filtered_result = self.opman.filter_oplog_entry(
                {'op': op, 'o': document})
            if filtered_result is not None:
                filtered_result = filtered_result['o']
            self.assertEqual(filtered_result, filtered_document)

        document = {'a': {'b': {'c': {'d': 0, 'e': 1}}}}
        exclude_fields = ['a.b.c.d']
        filtered_document = {'a': {'b': {'c': {'e': 1}}}}
        check_nested(document, exclude_fields, filtered_document)

        document = {'a': {'b': {'c': {'-a': 0, 'd': {'e': {'f': 1}}}}}}
        exclude_fields = ['a.b.c.d.e.f']
        filtered_document = {'a': {'b': {'c': {'-a': 0, 'd': {'e': {}}}}}}
        check_nested(document, exclude_fields, filtered_document)

        document = {'a': 1}
        exclude_fields = ['a']
        filtered_document = {}
        check_nested(document, exclude_fields, filtered_document)

        document = {'a': {'b': {'c': 2, 'e': 3}, 'e': 5},
                    'b': 2,
                    'c': {'g': 1}}
        exclude_fields = ['a.b.c', 'a.e']
        filtered_document = {'a': {'b': {'e': 3}},
                             'b': 2,
                             'c': {'g': 1}}
        check_nested(document, exclude_fields, filtered_document)

        document = {'a': {'b': {'c': 2, 'e': 3}, 'e': 5},
                    'b': 2,
                    'c': {'g': 1},
                    '_id': 1}
        exclude_fields = ['a.b.c', 'a.e', '_id']
        filtered_document = {'a': {'b': {'e': 3}},
                             'b': 2, 'c': {'g': 1},
                             '_id': 1}
        check_nested(document, exclude_fields, filtered_document)

        document = {'a': {'b': {'c': {'d': 1}}},
                    '-a': {'-b': {'-c': 2}}}
        exclude_fields = ['a.b', '-a']
        filtered_document = {'a': {}}
        check_nested(document, exclude_fields, filtered_document)

        document = {'a': {'b': {'c': {'d': 1}}},
                    '-a': {'-b': {'-c': 2}}}
        exclude_fields = ['a', '-a.-b']
        filtered_document = {'-a': {}}
        check_nested(document, exclude_fields, filtered_document)

        document = {'a': {'b': {'c': {'d': 1}}},
                    '-a': {'-b': {'-c': 2}},
                    '_id': 1}
        exclude_fields = ['a.b', '-a']
        filtered_document = {'_id': 1, 'a': {}}
        check_nested(document, exclude_fields, filtered_document)

        document = {'test': 1}
        exclude_fields = ['doesnt_exist']
        filtered_document = document.copy()
        check_nested(document, exclude_fields, filtered_document)

        document = {'test': 1}
        exclude_fields = ['test.doesnt_exist']
        filtered_document = document.copy()
        check_nested(document, exclude_fields, filtered_document)

        document = {'a': {'b': 1}, 'b': {'a': 1}}
        exclude_fields = ['a.b', 'b.a']
        filtered_document = {'a': {}, 'b': {}}
        check_nested(document, exclude_fields, filtered_document)

        document = {'a': {'b': {'a': {'b': 1}}}, 'c': {'a': {'b': 1}}}
        exclude_fields = ['a.b']
        filtered_document = {'a': {}, 'c': {'a': {'b': 1}}}
        check_nested(document, exclude_fields, filtered_document)

        document = {'name': 'anna', 'name_of_cat': 'pushkin'}
        exclude_fields = ['name']
        filtered_document = {'name_of_cat': 'pushkin'}
        check_nested(document, exclude_fields, filtered_document)

        update = {'$set': {'a.b': 1, 'a.c': 3, 'b': 2, 'c': {'b': 3}}}
        exclude_fields = ['a', 'c']
        filtered_update = {'$set': {'b': 2}}
        check_nested(update, exclude_fields, filtered_update, op='u')

        update = {'$set': {'a.b': {'c': 3, 'd': 1}, 'a.e': 1, 'a.f': 2}}
        exclude_fields = ['a.b.c', 'a.e']
        filtered_update = {'$set': {'a.b': {'d': 1}, 'a.f': 2}}
        check_nested(update, exclude_fields, filtered_update, op='u')

        update = {'$set': {'a.b': {'c': 3, 'd': 1}, 'a.e': 1}}
        exclude_fields = ['a.b.c', 'a.b.d', 'a.e']
        filtered_update = {'$set': {'a.b': {}}}
        check_nested(update, exclude_fields, filtered_update, op='u')

        update = {'$set': {'a.b.1': 1, 'a.b.2': 2, 'b': 3}}
        exclude_fields = ['a.b']
        filtered_update = {'$set': {'b': 3}}
        check_nested(update, exclude_fields, filtered_update, op='u')

        update = {'$set': {'a.b.c': 42, 'd.e.f': 123, 'g': 456}}
        exclude_fields = ['a.b', 'd']
        filtered_update = {'$set': {'g': 456}}
        check_nested(update, exclude_fields, filtered_update, op='u')

        update = {'$set': {'a.b': {'c': 3, 'd': 1}, 'a.e': 1}}
        exclude_fields = ['a.b', 'a.e']
        filtered_update = None
        check_nested(update, exclude_fields, filtered_update, op='u')

    def test_fields_and_exclude(self):
        fields = ['a', 'b', 'c', '_id']
        exclude_fields = ['x', 'y', 'z']

        # Test setting both to None in constructor
        opman = OplogThread(
            primary_client=self.primary_conn,
            doc_managers=(DocManager(),),
            oplog_progress_dict=LockingDict(),
            dest_mapping_stru=self.dest_mapping_stru,
            fields=None,
            exclude_fields=None
        )
        self._check_fields(opman, [], [], None)
        opman = OplogThread(
            primary_client=self.primary_conn,
            doc_managers=(DocManager(),),
            oplog_progress_dict=LockingDict(),
            dest_mapping_stru=self.dest_mapping_stru,
            fields=None,
            exclude_fields=exclude_fields
        )
        self._check_fields(opman, [], exclude_fields,
                           dict((f, 0) for f in exclude_fields))
        # Test setting fields when exclude_fields is set
        self.assertRaises(
            errors.InvalidConfiguration, setattr, opman, "fields", fields)
        self.assertRaises(
            errors.InvalidConfiguration, setattr, opman, "fields", None)
        opman = OplogThread(
            primary_client=self.primary_conn,
            doc_managers=(DocManager(),),
            oplog_progress_dict=LockingDict(),
            dest_mapping_stru=self.dest_mapping_stru,
            exclude_fields=None,
            fields=fields
        )
        self._check_fields(opman, fields, [], dict((f, 1) for f in fields))
        self.assertRaises(errors.InvalidConfiguration, setattr, opman,
                          "exclude_fields", exclude_fields)
        self.assertRaises(errors.InvalidConfiguration, setattr, opman,
                          "exclude_fields", None)
        self.assertRaises(
            errors.InvalidConfiguration, OplogThread,
            self.primary_conn,
            (DocManager(),),
            LockingDict(),
            self.dest_mapping_stru,
            fields=fields,
            exclude_fields=exclude_fields)
    def test_exclude_fields_constructor(self):
        # Test with the "_id" field in exclude_fields
        exclude_fields = ["_id", "title", "content", "author"]
        opman = OplogThread(
            primary_client=self.primary_conn,
            doc_managers=(DocManager(),),
            oplog_progress_dict=LockingDict(),
            dest_mapping_stru=self.dest_mapping_stru,
            exclude_fields=exclude_fields
        )
        exclude_fields.remove('_id')
        self._check_fields(opman, [], exclude_fields,
                           dict((f, 0) for f in exclude_fields))
        extra_fields = exclude_fields + ['extra1', 'extra2']
        filtered = opman.filter_oplog_entry(
            {'op': 'i',
             'o': dict((f, 1) for f in extra_fields)})['o']
        self.assertEqual(dict((f, 1) for f in ['extra1', 'extra2']), filtered)

        # Test without "_id" field included in exclude_fields
        exclude_fields = ["title", "content", "author"]
        opman = OplogThread(
            primary_client=self.primary_conn,
            doc_managers=(DocManager(),),
            oplog_progress_dict=LockingDict(),
            dest_mapping_stru=self.dest_mapping_stru,
            exclude_fields=exclude_fields
        )
        self._check_fields(opman, [], exclude_fields,
                           dict((f, 0) for f in exclude_fields))
        extra_fields = extra_fields + ['extra1', 'extra2']
        filtered = opman.filter_oplog_entry(
            {'op': 'i',
             'o': dict((f, 1) for f in extra_fields)})['o']
        self.assertEqual({'extra1': 1, 'extra2': 1}, filtered)

        # Test with only "_id" field in exclude_fields
        exclude_fields = ["_id"]
        opman = OplogThread(
            primary_client=self.primary_conn,
            doc_managers=(DocManager(),),
            oplog_progress_dict=LockingDict(),
            dest_mapping_stru=self.dest_mapping_stru,
            exclude_fields=exclude_fields
        )
        self._check_fields(opman, [], [], None)
        extra_fields = exclude_fields + ['extra1', 'extra2']
        filtered = opman.filter_oplog_entry(
            {'op': 'i',
             'o': dict((f, 1) for f in extra_fields)})['o']
        self.assertEqual(dict((f, 1) for f in extra_fields), filtered)

        # Test with nothing set for exclude_fields
        opman = OplogThread(
            primary_client=self.primary_conn,
            doc_managers=(DocManager(),),
            oplog_progress_dict=LockingDict(),
            dest_mapping_stru=self.dest_mapping_stru,
            exclude_fields=None
        )
        self._check_fields(opman, [], [], None)
        extra_fields = ['_id', 'extra1', 'extra2']
        filtered = opman.filter_oplog_entry(
            {'op': 'i',
             'o': dict((f, 1) for f in extra_fields)})['o']
        self.assertEqual(dict((f, 1) for f in extra_fields), filtered)
class TestFilterFields(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        cls.repl_set = ReplicaSetSingle().start()
        cls.primary_conn = cls.repl_set.client()
        cls.oplog_coll = cls.primary_conn.local["oplog.rs"]

    @classmethod
    def tearDownClass(cls):
        cls.primary_conn.drop_database("test")
        close_client(cls.primary_conn)
        cls.repl_set.stop()

    def setUp(self):
        self.namespace_config = NamespaceConfig()
        self.opman = OplogThread(
            primary_client=self.primary_conn,
            doc_managers=(DocManager(),),
            oplog_progress_dict=LockingDict(),
            namespace_config=self.namespace_config,
        )

    def tearDown(self):
        try:
            self.opman.join()
        except RuntimeError:
            # OplogThread may not have been started
            pass

    def reset_include_fields(self, fields):
        self.opman.namespace_config = NamespaceConfig(include_fields=fields)

    def reset_exclude_fields(self, fields):
        self.opman.namespace_config = NamespaceConfig(exclude_fields=fields)

    def test_filter_fields(self):
        docman = self.opman.doc_managers[0]
        conn = self.opman.primary_client

        include_fields = ["a", "b", "c"]
        exclude_fields = ["d", "e", "f"]

        # Set fields to care about
        self.reset_include_fields(include_fields)
        # Documents have more than just these fields
        doc = {"a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6, "_id": 1}
        db = conn["test"]["test"]
        db.insert_one(doc)
        assert_soon(lambda: db.count() == 1)
        self.opman.dump_collection()

        result = docman._search()[0]
        keys = result.keys()
        for inc, exc in zip(include_fields, exclude_fields):
            self.assertIn(inc, keys)
            self.assertNotIn(exc, keys)

    def test_filter_exclude_oplog_entry(self):
        # Test oplog entries: these are callables, since
        # filter_oplog_entry modifies the oplog entry in-place
        def insert_op():
            return {"op": "i", "o": {"_id": 0, "a": 1, "b": 2, "c": 3}}

        def update_op():
            return {
                "op": "u",
                "o": {"$set": {"a": 4, "b": 5}, "$unset": {"c": True}},
                "o2": {"_id": 1},
            }

        def filter_doc(document, fields):
            if fields and "_id" in fields:
                fields.remove("_id")
            return self.opman.filter_oplog_entry(document, exclude_fields=fields)

        # Case 0: insert op, no fields provided
        filtered = filter_doc(insert_op(), None)
        self.assertEqual(filtered, insert_op())

        # Case 1: insert op, fields provided
        filtered = filter_doc(insert_op(), ["c"])
        self.assertEqual(filtered["o"], {"_id": 0, "a": 1, "b": 2})

        # Case 2: insert op, fields provided, doc becomes empty except for _id
        filtered = filter_doc(insert_op(), ["a", "b", "c"])
        self.assertEqual(filtered["o"], {"_id": 0})

        # Case 3: update op, no fields provided
        filtered = filter_doc(update_op(), None)
        self.assertEqual(filtered, update_op())

        # Case 4: update op, fields provided
        filtered = filter_doc(update_op(), ["b"])
        self.assertNotIn("b", filtered["o"]["$set"])
        self.assertIn("a", filtered["o"]["$set"])
        self.assertEqual(filtered["o"]["$unset"], update_op()["o"]["$unset"])

        # Case 5: update op, fields provided, empty $set
        filtered = filter_doc(update_op(), ["a", "b"])
        self.assertNotIn("$set", filtered["o"])
        self.assertEqual(filtered["o"]["$unset"], update_op()["o"]["$unset"])

        # Case 6: update op, fields provided, empty $unset
        filtered = filter_doc(update_op(), ["c"])
        self.assertNotIn("$unset", filtered["o"])
        self.assertEqual(filtered["o"]["$set"], update_op()["o"]["$set"])

        # Case 7: update op, fields provided, entry is nullified
        filtered = filter_doc(update_op(), ["a", "b", "c"])
        self.assertEqual(filtered, None)

        # Case 8: update op, fields provided, replacement
        filtered = filter_doc(
            {"op": "u", "o": {"a": 1, "b": 2, "c": 3, "d": 4}}, ["d", "e", "f"]
        )
        self.assertEqual(filtered, {"op": "u", "o": {"a": 1, "b": 2, "c": 3}})

    def test_filter_oplog_entry(self):
        # Test oplog entries: these are callables, since
        # filter_oplog_entry modifies the oplog entry in-place
        def insert_op():
            return {"op": "i", "o": {"_id": 0, "a": 1, "b": 2, "c": 3}}

        def update_op():
            return {
                "op": "u",
                "o": {"$set": {"a": 4, "b": 5}, "$unset": {"c": True}},
                "o2": {"_id": 1},
            }

        def filter_doc(document, fields):
            if fields and "_id" not in fields:
                fields.append("_id")
            return self.opman.filter_oplog_entry(document, include_fields=fields)

        # Case 0: insert op, no fields provided
        filtered = filter_doc(insert_op(), None)
        self.assertEqual(filtered, insert_op())

        # Case 1: insert op, fields provided
        filtered = filter_doc(insert_op(), ["a", "b"])
        self.assertEqual(filtered["o"], {"_id": 0, "a": 1, "b": 2})

        # Case 2: insert op, fields provided, doc becomes empty except for _id
        filtered = filter_doc(insert_op(), ["d", "e", "f"])
        self.assertEqual(filtered["o"], {"_id": 0})

        # Case 3: update op, no fields provided
        filtered = filter_doc(update_op(), None)
        self.assertEqual(filtered, update_op())

        # Case 4: update op, fields provided
        filtered = filter_doc(update_op(), ["a", "c"])
        self.assertNotIn("b", filtered["o"]["$set"])
        self.assertIn("a", filtered["o"]["$set"])
        self.assertEqual(filtered["o"]["$unset"], update_op()["o"]["$unset"])

        # Case 5: update op, fields provided, empty $set
        filtered = filter_doc(update_op(), ["c"])
        self.assertNotIn("$set", filtered["o"])
        self.assertEqual(filtered["o"]["$unset"], update_op()["o"]["$unset"])

        # Case 6: update op, fields provided, empty $unset
        filtered = filter_doc(update_op(), ["a", "b"])
        self.assertNotIn("$unset", filtered["o"])
        self.assertEqual(filtered["o"]["$set"], update_op()["o"]["$set"])

        # Case 7: update op, fields provided, entry is nullified
        filtered = filter_doc(update_op(), ["d", "e", "f"])
        self.assertEqual(filtered, None)

        # Case 8: update op, fields provided, replacement
        filtered = filter_doc(
            {"op": "u", "o": {"a": 1, "b": 2, "c": 3, "d": 4}}, ["a", "b", "c"]
        )
        self.assertEqual(filtered, {"op": "u", "o": {"a": 1, "b": 2, "c": 3}})

    def test_nested_fields(self):
        def check_nested(document, fields, filtered_document, op="i"):
            if "_id" not in fields:
                fields.append("_id")
            filtered_result = self.opman.filter_oplog_entry(
                {"op": op, "o": document}, include_fields=fields
            )
            if filtered_result is not None:
                filtered_result = filtered_result["o"]
            self.assertEqual(filtered_result, filtered_document)

        document = {"name": "Han Solo", "a": {"b": {}}}
        fields = ["name", "a.b.c"]
        filtered_document = {"name": "Han Solo"}
        check_nested(document, fields, filtered_document)

        document = {"a": {"b": {"c": 2, "e": 3}, "e": 5}, "b": 2, "c": {"g": 1}}
        fields = ["a.b.c", "a.e"]
        filtered_document = {"a": {"b": {"c": 2}, "e": 5}}
        check_nested(document, fields, filtered_document)

        document = {
            "a": {"b": {"c": 2, "e": 3}, "e": 5},
            "b": 2,
            "c": {"g": 1},
            "_id": 1,
        }
        fields = ["a.b.c", "a.e"]
        filtered_document = {"a": {"b": {"c": 2}, "e": 5}, "_id": 1}
        check_nested(document, fields, filtered_document)

        document = {"a": {"b": {"c": {"d": 1}}}, "-a": {"-b": {"-c": 2}}}
        fields = ["a.b", "-a"]
        filtered_document = document.copy()
        check_nested(document, fields, filtered_document)

        document = {"a": {"b": {"c": {"d": 1}}}, "-a": {"-b": {"-c": 2}}}
        fields = ["a", "-a.-b"]
        filtered_document = document.copy()
        check_nested(document, fields, filtered_document)
        document = {"a": {"b": {"c": {"d": 1}}}, "-a": {"-b": {"-c": 2}}, "_id": 1}

        fields = ["a.b", "-a"]
        filtered_document = document.copy()
        check_nested(document, fields, filtered_document)
        fields = ["a", "-a.-b"]
        check_nested(document, fields, filtered_document)

        document = {"test": 1}
        fields = ["doesnt_exist"]
        filtered_document = {}
        check_nested(document, fields, filtered_document)

        document = {"a": {"b": 1}, "b": {"a": 1}}
        fields = ["a.b", "b.a"]
        filtered_document = document.copy()
        check_nested(document, fields, filtered_document)

        document = {"a": {"b": {"a": {"b": 1}}}, "c": {"a": {"b": 1}}}
        fields = ["a.b"]
        filtered_document = {"a": {"b": {"a": {"b": 1}}}}
        check_nested(document, fields, filtered_document)

        document = {"name": "anna", "name_of_cat": "pushkin"}
        fields = ["name"]
        filtered_document = {"name": "anna"}
        check_nested(document, fields, filtered_document)

        update = {"$set": {"a.b": 1, "a.c": 3, "b": 2, "c": {"b": 3}}}
        fields = ["a", "c"]
        filtered_update = {"$set": {"a.b": 1, "a.c": 3, "c": {"b": 3}}}
        check_nested(update, fields, filtered_update, op="u")

        update = {"$set": {"a.b": {"c": 3, "d": 1}, "a.e": 1, "a.f": 2}}
        fields = ["a.b.c", "a.e"]
        filtered_update = {"$set": {"a.b": {"c": 3}, "a.e": 1}}
        check_nested(update, fields, filtered_update, op="u")

        update = {"$set": {"a.b.1": 1, "a.b.2": 2, "b": 3}}
        fields = ["a.b"]
        filtered_update = {"$set": {"a.b.1": 1, "a.b.2": 2}}
        check_nested(update, fields, filtered_update, op="u")

        update = {"$set": {"a.b": {"c": 3, "d": 1}, "a.e": 1}}
        fields = ["a.b.e"]
        filtered_update = None
        check_nested(update, fields, filtered_update, op="u")

    def test_nested_exclude_fields(self):
        def check_nested(document, exclude_fields, filtered_document, op="i"):
            if "_id" in exclude_fields:
                exclude_fields.remove("_id")
            filtered_result = self.opman.filter_oplog_entry(
                {"op": op, "o": document}, exclude_fields=exclude_fields
            )
            if filtered_result is not None:
                filtered_result = filtered_result["o"]
            self.assertEqual(filtered_result, filtered_document)

        document = {"a": {"b": {"c": {"d": 0, "e": 1}}}}
        exclude_fields = ["a.b.c.d"]
        filtered_document = {"a": {"b": {"c": {"e": 1}}}}
        check_nested(document, exclude_fields, filtered_document)

        document = {"a": {"b": {"c": {"-a": 0, "d": {"e": {"f": 1}}}}}}
        exclude_fields = ["a.b.c.d.e.f"]
        filtered_document = {"a": {"b": {"c": {"-a": 0, "d": {"e": {}}}}}}
        check_nested(document, exclude_fields, filtered_document)

        document = {"a": 1}
        exclude_fields = ["a"]
        filtered_document = {}
        check_nested(document, exclude_fields, filtered_document)

        document = {"a": {"b": {"c": 2, "e": 3}, "e": 5}, "b": 2, "c": {"g": 1}}
        exclude_fields = ["a.b.c", "a.e"]
        filtered_document = {"a": {"b": {"e": 3}}, "b": 2, "c": {"g": 1}}
        check_nested(document, exclude_fields, filtered_document)

        document = {
            "a": {"b": {"c": 2, "e": 3}, "e": 5},
            "b": 2,
            "c": {"g": 1},
            "_id": 1,
        }
        exclude_fields = ["a.b.c", "a.e", "_id"]
        filtered_document = {"a": {"b": {"e": 3}}, "b": 2, "c": {"g": 1}, "_id": 1}
        check_nested(document, exclude_fields, filtered_document)

        document = {"a": {"b": {"c": {"d": 1}}}, "-a": {"-b": {"-c": 2}}}
        exclude_fields = ["a.b", "-a"]
        filtered_document = {"a": {}}
        check_nested(document, exclude_fields, filtered_document)

        document = {"a": {"b": {"c": {"d": 1}}}, "-a": {"-b": {"-c": 2}}}
        exclude_fields = ["a", "-a.-b"]
        filtered_document = {"-a": {}}
        check_nested(document, exclude_fields, filtered_document)

        document = {"a": {"b": {"c": {"d": 1}}}, "-a": {"-b": {"-c": 2}}, "_id": 1}
        exclude_fields = ["a.b", "-a"]
        filtered_document = {"_id": 1, "a": {}}
        check_nested(document, exclude_fields, filtered_document)

        document = {"test": 1}
        exclude_fields = ["doesnt_exist"]
        filtered_document = document.copy()
        check_nested(document, exclude_fields, filtered_document)

        document = {"test": 1}
        exclude_fields = ["test.doesnt_exist"]
        filtered_document = document.copy()
        check_nested(document, exclude_fields, filtered_document)

        document = {"a": {"b": 1}, "b": {"a": 1}}
        exclude_fields = ["a.b", "b.a"]
        filtered_document = {"a": {}, "b": {}}
        check_nested(document, exclude_fields, filtered_document)

        document = {"a": {"b": {"a": {"b": 1}}}, "c": {"a": {"b": 1}}}
        exclude_fields = ["a.b"]
        filtered_document = {"a": {}, "c": {"a": {"b": 1}}}
        check_nested(document, exclude_fields, filtered_document)

        document = {"name": "anna", "name_of_cat": "pushkin"}
        exclude_fields = ["name"]
        filtered_document = {"name_of_cat": "pushkin"}
        check_nested(document, exclude_fields, filtered_document)

        update = {"$set": {"a.b": 1, "a.c": 3, "b": 2, "c": {"b": 3}}}
        exclude_fields = ["a", "c"]
        filtered_update = {"$set": {"b": 2}}
        check_nested(update, exclude_fields, filtered_update, op="u")

        update = {"$set": {"a.b": {"c": 3, "d": 1}, "a.e": 1, "a.f": 2}}
        exclude_fields = ["a.b.c", "a.e"]
        filtered_update = {"$set": {"a.b": {"d": 1}, "a.f": 2}}
        check_nested(update, exclude_fields, filtered_update, op="u")

        update = {"$set": {"a.b": {"c": 3, "d": 1}, "a.e": 1}}
        exclude_fields = ["a.b.c", "a.b.d", "a.e"]
        filtered_update = {"$set": {"a.b": {}}}
        check_nested(update, exclude_fields, filtered_update, op="u")

        update = {"$set": {"a.b.1": 1, "a.b.2": 2, "b": 3}}
        exclude_fields = ["a.b"]
        filtered_update = {"$set": {"b": 3}}
        check_nested(update, exclude_fields, filtered_update, op="u")

        update = {"$set": {"a.b.c": 42, "d.e.f": 123, "g": 456}}
        exclude_fields = ["a.b", "d"]
        filtered_update = {"$set": {"g": 456}}
        check_nested(update, exclude_fields, filtered_update, op="u")

        update = {"$set": {"a.b": {"c": 3, "d": 1}, "a.e": 1}}
        exclude_fields = ["a.b", "a.e"]
        filtered_update = None
        check_nested(update, exclude_fields, filtered_update, op="u")
Esempio n. 12
0
class TestFilterFields(unittest.TestCase):

    @classmethod
    def setUpClass(cls):
        cls.repl_set = ReplicaSetSingle().start()
        cls.primary_conn = cls.repl_set.client()
        cls.oplog_coll = cls.primary_conn.local['oplog.rs']

    @classmethod
    def tearDownClass(cls):
        cls.primary_conn.drop_database("test")
        close_client(cls.primary_conn)
        cls.repl_set.stop()

    def setUp(self):
        self.namespace_config = NamespaceConfig()
        self.opman = OplogThread(
            primary_client=self.primary_conn,
            doc_managers=(DocManager(),),
            oplog_progress_dict=LockingDict(),
            namespace_config=self.namespace_config
        )

    def tearDown(self):
        try:
            self.opman.join()
        except RuntimeError:
            # OplogThread may not have been started
            pass

    def reset_include_fields(self, fields):
        self.opman.namespace_config = NamespaceConfig(include_fields=fields)

    def reset_exclude_fields(self, fields):
        self.opman.namespace_config = NamespaceConfig(exclude_fields=fields)

    def test_filter_fields(self):
        docman = self.opman.doc_managers[0]
        conn = self.opman.primary_client

        include_fields = ["a", "b", "c"]
        exclude_fields = ["d", "e", "f"]

        # Set fields to care about
        self.reset_include_fields(include_fields)
        # Documents have more than just these fields
        doc = {
            "a": 1, "b": 2, "c": 3,
            "d": 4, "e": 5, "f": 6,
            "_id": 1
        }
        db = conn['test']['test']
        db.insert_one(doc)
        assert_soon(lambda: db.count() == 1)
        self.opman.dump_collection()

        result = docman._search()[0]
        keys = result.keys()
        for inc, exc in zip(include_fields, exclude_fields):
            self.assertIn(inc, keys)
            self.assertNotIn(exc, keys)

    def test_filter_exclude_oplog_entry(self):
        # Test oplog entries: these are callables, since
        # filter_oplog_entry modifies the oplog entry in-place
        insert_op = lambda: {
            "op": "i",
            "o": {
                "_id": 0,
                "a": 1,
                "b": 2,
                "c": 3
            }
        }
        update_op = lambda: {
            "op": "u",
            "o": {
                "$set": {
                    "a": 4,
                    "b": 5
                },
                "$unset": {
                    "c": True
                }
            },
            "o2": {
                "_id": 1
            }
        }

        def filter_doc(document, fields):
            if fields and '_id' in fields:
                fields.remove('_id')
            return self.opman.filter_oplog_entry(
                document, exclude_fields=fields)

        # Case 0: insert op, no fields provided
        filtered = filter_doc(insert_op(), None)
        self.assertEqual(filtered, insert_op())

        # Case 1: insert op, fields provided
        filtered = filter_doc(insert_op(), ['c'])
        self.assertEqual(filtered['o'], {'_id': 0, 'a': 1, 'b': 2})

        # Case 2: insert op, fields provided, doc becomes empty except for _id
        filtered = filter_doc(insert_op(), ['a', 'b', 'c'])
        self.assertEqual(filtered['o'], {'_id': 0})

        # Case 3: update op, no fields provided
        filtered = filter_doc(update_op(), None)
        self.assertEqual(filtered, update_op())

        # Case 4: update op, fields provided
        filtered = filter_doc(update_op(), ['b'])
        self.assertNotIn('b', filtered['o']['$set'])
        self.assertIn('a', filtered['o']['$set'])
        self.assertEqual(filtered['o']['$unset'], update_op()['o']['$unset'])

        # Case 5: update op, fields provided, empty $set
        filtered = filter_doc(update_op(), ['a', 'b'])
        self.assertNotIn('$set', filtered['o'])
        self.assertEqual(filtered['o']['$unset'], update_op()['o']['$unset'])

        # Case 6: update op, fields provided, empty $unset
        filtered = filter_doc(update_op(), ['c'])
        self.assertNotIn('$unset', filtered['o'])
        self.assertEqual(filtered['o']['$set'], update_op()['o']['$set'])

        # Case 7: update op, fields provided, entry is nullified
        filtered = filter_doc(update_op(), ['a', 'b', 'c'])
        self.assertEqual(filtered, None)

        # Case 8: update op, fields provided, replacement
        filtered = filter_doc({
            'op': 'u',
            'o': {'a': 1, 'b': 2, 'c': 3, 'd': 4}
        }, ['d', 'e', 'f'])
        self.assertEqual(
            filtered, {'op': 'u', 'o': {'a': 1, 'b': 2, 'c': 3}})

    def test_filter_oplog_entry(self):
        # Test oplog entries: these are callables, since
        # filter_oplog_entry modifies the oplog entry in-place
        insert_op = lambda: {
            "op": "i",
            "o": {
                "_id": 0,
                "a": 1,
                "b": 2,
                "c": 3
            }
        }
        update_op = lambda: {
            "op": "u",
            "o": {
                "$set": {
                    "a": 4,
                    "b": 5
                },
                "$unset": {
                    "c": True
                }
            },
            "o2": {
                "_id": 1
            }
        }

        def filter_doc(document, fields):
            if fields and '_id' not in fields:
                fields.append('_id')
            return self.opman.filter_oplog_entry(
                document, include_fields=fields)

        # Case 0: insert op, no fields provided
        filtered = filter_doc(insert_op(), None)
        self.assertEqual(filtered, insert_op())

        # Case 1: insert op, fields provided
        filtered = filter_doc(insert_op(), ['a', 'b'])
        self.assertEqual(filtered['o'], {'_id': 0, 'a': 1, 'b': 2})

        # Case 2: insert op, fields provided, doc becomes empty except for _id
        filtered = filter_doc(insert_op(), ['d', 'e', 'f'])
        self.assertEqual(filtered['o'], {'_id': 0})

        # Case 3: update op, no fields provided
        filtered = filter_doc(update_op(), None)
        self.assertEqual(filtered, update_op())

        # Case 4: update op, fields provided
        filtered = filter_doc(update_op(), ['a', 'c'])
        self.assertNotIn('b', filtered['o']['$set'])
        self.assertIn('a', filtered['o']['$set'])
        self.assertEqual(filtered['o']['$unset'], update_op()['o']['$unset'])

        # Case 5: update op, fields provided, empty $set
        filtered = filter_doc(update_op(), ['c'])
        self.assertNotIn('$set', filtered['o'])
        self.assertEqual(filtered['o']['$unset'], update_op()['o']['$unset'])

        # Case 6: update op, fields provided, empty $unset
        filtered = filter_doc(update_op(), ['a', 'b'])
        self.assertNotIn('$unset', filtered['o'])
        self.assertEqual(filtered['o']['$set'], update_op()['o']['$set'])

        # Case 7: update op, fields provided, entry is nullified
        filtered = filter_doc(update_op(), ['d', 'e', 'f'])
        self.assertEqual(filtered, None)

        # Case 8: update op, fields provided, replacement
        filtered = filter_doc({
            'op': 'u',
            'o': {'a': 1, 'b': 2, 'c': 3, 'd': 4}
        }, ['a', 'b', 'c'])
        self.assertEqual(
            filtered, {'op': 'u', 'o': {'a': 1, 'b': 2, 'c': 3}})

    def test_nested_fields(self):
        def check_nested(document, fields, filtered_document, op='i'):
            if '_id' not in fields:
                fields.append('_id')
            filtered_result = self.opman.filter_oplog_entry(
                {'op': op, 'o': document}, include_fields=fields)
            if filtered_result is not None:
                filtered_result = filtered_result['o']
            self.assertEqual(filtered_result, filtered_document)

        document = {'name': 'Han Solo', 'a': {'b': {}}}
        fields = ['name', 'a.b.c']
        filtered_document = {'name': 'Han Solo'}
        check_nested(document, fields, filtered_document)

        document = {'a': {'b': {'c': 2, 'e': 3}, 'e': 5},
                    'b': 2,
                    'c': {'g': 1}}
        fields = ['a.b.c', 'a.e']
        filtered_document = {'a': {'b': {'c': 2}, 'e': 5}}
        check_nested(document, fields, filtered_document)

        document = {'a': {'b': {'c': 2, 'e': 3}, 'e': 5},
                    'b': 2,
                    'c': {'g': 1},
                    '_id': 1}
        fields = ['a.b.c', 'a.e']
        filtered_document = {'a': {'b': {'c': 2}, 'e': 5}, '_id': 1}
        check_nested(document, fields, filtered_document)

        document = {'a': {'b': {'c': {'d': 1}}}, '-a': {'-b': {'-c': 2}}}
        fields = ['a.b', '-a']
        filtered_document = document.copy()
        check_nested(document, fields, filtered_document)

        document = {'a': {'b': {'c': {'d': 1}}}, '-a': {'-b': {'-c': 2}}}
        fields = ['a', '-a.-b']
        filtered_document = document.copy()
        check_nested(document, fields, filtered_document)
        document = {'a': {'b': {'c': {'d': 1}}}, '-a': {'-b': {'-c': 2}},
                    '_id': 1}

        fields = ['a.b', '-a']
        filtered_document = document.copy()
        check_nested(document, fields, filtered_document)
        fields = ['a', '-a.-b']
        check_nested(document, fields, filtered_document)

        document = {'test': 1}
        fields = ['doesnt_exist']
        filtered_document = {}
        check_nested(document, fields, filtered_document)

        document = {'a': {'b': 1}, 'b': {'a': 1}}
        fields = ['a.b', 'b.a']
        filtered_document = document.copy()
        check_nested(document, fields, filtered_document)

        document = {'a': {'b': {'a': {'b': 1}}}, 'c': {'a': {'b': 1}}}
        fields = ['a.b']
        filtered_document = {'a': {'b': {'a': {'b': 1}}}}
        check_nested(document, fields, filtered_document)

        document = {'name': 'anna', 'name_of_cat': 'pushkin'}
        fields = ['name']
        filtered_document = {'name': 'anna'}
        check_nested(document, fields, filtered_document)

        update = {'$set': {'a.b': 1, 'a.c': 3, 'b': 2, 'c': {'b': 3}}}
        fields = ['a', 'c']
        filtered_update = {'$set': {'a.b': 1, 'a.c': 3, 'c': {'b': 3}}}
        check_nested(update, fields, filtered_update, op='u')

        update = {'$set': {'a.b': {'c': 3, 'd': 1}, 'a.e': 1, 'a.f': 2}}
        fields = ['a.b.c', 'a.e']
        filtered_update = {'$set': {'a.b': {'c': 3}, 'a.e': 1}}
        check_nested(update, fields, filtered_update, op='u')

        update = {'$set': {'a.b.1': 1, 'a.b.2': 2, 'b': 3}}
        fields = ['a.b']
        filtered_update = {'$set': {'a.b.1': 1, 'a.b.2': 2}}
        check_nested(update, fields, filtered_update, op='u')

        update = {'$set': {'a.b': {'c': 3, 'd': 1}, 'a.e': 1}}
        fields = ['a.b.e']
        filtered_update = None
        check_nested(update, fields, filtered_update, op='u')

    def test_nested_exclude_fields(self):
        def check_nested(document, exclude_fields, filtered_document, op='i'):
            if '_id' in exclude_fields:
                exclude_fields.remove('_id')
            filtered_result = self.opman.filter_oplog_entry(
                {'op': op, 'o': document}, exclude_fields=exclude_fields)
            if filtered_result is not None:
                filtered_result = filtered_result['o']
            self.assertEqual(filtered_result, filtered_document)

        document = {'a': {'b': {'c': {'d': 0, 'e': 1}}}}
        exclude_fields = ['a.b.c.d']
        filtered_document = {'a': {'b': {'c': {'e': 1}}}}
        check_nested(document, exclude_fields, filtered_document)

        document = {'a': {'b': {'c': {'-a': 0, 'd': {'e': {'f': 1}}}}}}
        exclude_fields = ['a.b.c.d.e.f']
        filtered_document = {'a': {'b': {'c': {'-a': 0, 'd': {'e': {}}}}}}
        check_nested(document, exclude_fields, filtered_document)

        document = {'a': 1}
        exclude_fields = ['a']
        filtered_document = {}
        check_nested(document, exclude_fields, filtered_document)

        document = {'a': {'b': {'c': 2, 'e': 3}, 'e': 5},
                    'b': 2,
                    'c': {'g': 1}}
        exclude_fields = ['a.b.c', 'a.e']
        filtered_document = {'a': {'b': {'e': 3}},
                             'b': 2,
                             'c': {'g': 1}}
        check_nested(document, exclude_fields, filtered_document)

        document = {'a': {'b': {'c': 2, 'e': 3}, 'e': 5},
                    'b': 2,
                    'c': {'g': 1},
                    '_id': 1}
        exclude_fields = ['a.b.c', 'a.e', '_id']
        filtered_document = {'a': {'b': {'e': 3}},
                             'b': 2, 'c': {'g': 1},
                             '_id': 1}
        check_nested(document, exclude_fields, filtered_document)

        document = {'a': {'b': {'c': {'d': 1}}},
                    '-a': {'-b': {'-c': 2}}}
        exclude_fields = ['a.b', '-a']
        filtered_document = {'a': {}}
        check_nested(document, exclude_fields, filtered_document)

        document = {'a': {'b': {'c': {'d': 1}}},
                    '-a': {'-b': {'-c': 2}}}
        exclude_fields = ['a', '-a.-b']
        filtered_document = {'-a': {}}
        check_nested(document, exclude_fields, filtered_document)

        document = {'a': {'b': {'c': {'d': 1}}},
                    '-a': {'-b': {'-c': 2}},
                    '_id': 1}
        exclude_fields = ['a.b', '-a']
        filtered_document = {'_id': 1, 'a': {}}
        check_nested(document, exclude_fields, filtered_document)

        document = {'test': 1}
        exclude_fields = ['doesnt_exist']
        filtered_document = document.copy()
        check_nested(document, exclude_fields, filtered_document)

        document = {'test': 1}
        exclude_fields = ['test.doesnt_exist']
        filtered_document = document.copy()
        check_nested(document, exclude_fields, filtered_document)

        document = {'a': {'b': 1}, 'b': {'a': 1}}
        exclude_fields = ['a.b', 'b.a']
        filtered_document = {'a': {}, 'b': {}}
        check_nested(document, exclude_fields, filtered_document)

        document = {'a': {'b': {'a': {'b': 1}}}, 'c': {'a': {'b': 1}}}
        exclude_fields = ['a.b']
        filtered_document = {'a': {}, 'c': {'a': {'b': 1}}}
        check_nested(document, exclude_fields, filtered_document)

        document = {'name': 'anna', 'name_of_cat': 'pushkin'}
        exclude_fields = ['name']
        filtered_document = {'name_of_cat': 'pushkin'}
        check_nested(document, exclude_fields, filtered_document)

        update = {'$set': {'a.b': 1, 'a.c': 3, 'b': 2, 'c': {'b': 3}}}
        exclude_fields = ['a', 'c']
        filtered_update = {'$set': {'b': 2}}
        check_nested(update, exclude_fields, filtered_update, op='u')

        update = {'$set': {'a.b': {'c': 3, 'd': 1}, 'a.e': 1, 'a.f': 2}}
        exclude_fields = ['a.b.c', 'a.e']
        filtered_update = {'$set': {'a.b': {'d': 1}, 'a.f': 2}}
        check_nested(update, exclude_fields, filtered_update, op='u')

        update = {'$set': {'a.b': {'c': 3, 'd': 1}, 'a.e': 1}}
        exclude_fields = ['a.b.c', 'a.b.d', 'a.e']
        filtered_update = {'$set': {'a.b': {}}}
        check_nested(update, exclude_fields, filtered_update, op='u')

        update = {'$set': {'a.b.1': 1, 'a.b.2': 2, 'b': 3}}
        exclude_fields = ['a.b']
        filtered_update = {'$set': {'b': 3}}
        check_nested(update, exclude_fields, filtered_update, op='u')

        update = {'$set': {'a.b.c': 42, 'd.e.f': 123, 'g': 456}}
        exclude_fields = ['a.b', 'd']
        filtered_update = {'$set': {'g': 456}}
        check_nested(update, exclude_fields, filtered_update, op='u')

        update = {'$set': {'a.b': {'c': 3, 'd': 1}, 'a.e': 1}}
        exclude_fields = ['a.b', 'a.e']
        filtered_update = None
        check_nested(update, exclude_fields, filtered_update, op='u')
class TestFilterFields(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        cls.repl_set = ReplicaSetSingle().start()
        cls.primary_conn = cls.repl_set.client()
        cls.oplog_coll = cls.primary_conn.local["oplog.rs"]

    @classmethod
    def tearDownClass(cls):
        cls.primary_conn.drop_database("test")
        close_client(cls.primary_conn)
        cls.repl_set.stop()

    def setUp(self):
        self.namespace_config = NamespaceConfig()
        self.opman = OplogThread(
            primary_client=self.primary_conn,
            doc_managers=(DocManager(), ),
            oplog_progress_dict=LockingDict(),
            namespace_config=self.namespace_config,
        )

    def tearDown(self):
        try:
            self.opman.join()
        except RuntimeError:
            # OplogThread may not have been started
            pass

    def reset_include_fields(self, fields):
        self.opman.namespace_config = NamespaceConfig(include_fields=fields)

    def reset_exclude_fields(self, fields):
        self.opman.namespace_config = NamespaceConfig(exclude_fields=fields)

    def test_filter_fields(self):
        docman = self.opman.doc_managers[0]
        conn = self.opman.primary_client

        include_fields = ["a", "b", "c"]
        exclude_fields = ["d", "e", "f"]

        # Set fields to care about
        self.reset_include_fields(include_fields)
        # Documents have more than just these fields
        doc = {"a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6, "_id": 1}
        db = conn["test"]["test"]
        db.insert_one(doc)
        assert_soon(lambda: db.count() == 1)
        self.opman.dump_collection()

        result = docman._search()[0]
        keys = result.keys()
        for inc, exc in zip(include_fields, exclude_fields):
            self.assertIn(inc, keys)
            self.assertNotIn(exc, keys)

    def test_filter_exclude_oplog_entry(self):
        # Test oplog entries: these are callables, since
        # filter_oplog_entry modifies the oplog entry in-place
        def insert_op():
            return {"op": "i", "o": {"_id": 0, "a": 1, "b": 2, "c": 3}}

        def update_op():
            return {
                "op": "u",
                "o": {
                    "$set": {
                        "a": 4,
                        "b": 5
                    },
                    "$unset": {
                        "c": True
                    }
                },
                "o2": {
                    "_id": 1
                },
            }

        def filter_doc(document, fields):
            if fields and "_id" in fields:
                fields.remove("_id")
            return self.opman.filter_oplog_entry(document,
                                                 exclude_fields=fields)

        # Case 0: insert op, no fields provided
        filtered = filter_doc(insert_op(), None)
        self.assertEqual(filtered, insert_op())

        # Case 1: insert op, fields provided
        filtered = filter_doc(insert_op(), ["c"])
        self.assertEqual(filtered["o"], {"_id": 0, "a": 1, "b": 2})

        # Case 2: insert op, fields provided, doc becomes empty except for _id
        filtered = filter_doc(insert_op(), ["a", "b", "c"])
        self.assertEqual(filtered["o"], {"_id": 0})

        # Case 3: update op, no fields provided
        filtered = filter_doc(update_op(), None)
        self.assertEqual(filtered, update_op())

        # Case 4: update op, fields provided
        filtered = filter_doc(update_op(), ["b"])
        self.assertNotIn("b", filtered["o"]["$set"])
        self.assertIn("a", filtered["o"]["$set"])
        self.assertEqual(filtered["o"]["$unset"], update_op()["o"]["$unset"])

        # Case 5: update op, fields provided, empty $set
        filtered = filter_doc(update_op(), ["a", "b"])
        self.assertNotIn("$set", filtered["o"])
        self.assertEqual(filtered["o"]["$unset"], update_op()["o"]["$unset"])

        # Case 6: update op, fields provided, empty $unset
        filtered = filter_doc(update_op(), ["c"])
        self.assertNotIn("$unset", filtered["o"])
        self.assertEqual(filtered["o"]["$set"], update_op()["o"]["$set"])

        # Case 7: update op, fields provided, entry is nullified
        filtered = filter_doc(update_op(), ["a", "b", "c"])
        self.assertEqual(filtered, None)

        # Case 8: update op, fields provided, replacement
        filtered = filter_doc(
            {
                "op": "u",
                "o": {
                    "a": 1,
                    "b": 2,
                    "c": 3,
                    "d": 4
                }
            }, ["d", "e", "f"])
        self.assertEqual(filtered, {"op": "u", "o": {"a": 1, "b": 2, "c": 3}})

    def test_filter_oplog_entry(self):
        # Test oplog entries: these are callables, since
        # filter_oplog_entry modifies the oplog entry in-place
        def insert_op():
            return {"op": "i", "o": {"_id": 0, "a": 1, "b": 2, "c": 3}}

        def update_op():
            return {
                "op": "u",
                "o": {
                    "$set": {
                        "a": 4,
                        "b": 5
                    },
                    "$unset": {
                        "c": True
                    }
                },
                "o2": {
                    "_id": 1
                },
            }

        def filter_doc(document, fields):
            if fields and "_id" not in fields:
                fields.append("_id")
            return self.opman.filter_oplog_entry(document,
                                                 include_fields=fields)

        # Case 0: insert op, no fields provided
        filtered = filter_doc(insert_op(), None)
        self.assertEqual(filtered, insert_op())

        # Case 1: insert op, fields provided
        filtered = filter_doc(insert_op(), ["a", "b"])
        self.assertEqual(filtered["o"], {"_id": 0, "a": 1, "b": 2})

        # Case 2: insert op, fields provided, doc becomes empty except for _id
        filtered = filter_doc(insert_op(), ["d", "e", "f"])
        self.assertEqual(filtered["o"], {"_id": 0})

        # Case 3: update op, no fields provided
        filtered = filter_doc(update_op(), None)
        self.assertEqual(filtered, update_op())

        # Case 4: update op, fields provided
        filtered = filter_doc(update_op(), ["a", "c"])
        self.assertNotIn("b", filtered["o"]["$set"])
        self.assertIn("a", filtered["o"]["$set"])
        self.assertEqual(filtered["o"]["$unset"], update_op()["o"]["$unset"])

        # Case 5: update op, fields provided, empty $set
        filtered = filter_doc(update_op(), ["c"])
        self.assertNotIn("$set", filtered["o"])
        self.assertEqual(filtered["o"]["$unset"], update_op()["o"]["$unset"])

        # Case 6: update op, fields provided, empty $unset
        filtered = filter_doc(update_op(), ["a", "b"])
        self.assertNotIn("$unset", filtered["o"])
        self.assertEqual(filtered["o"]["$set"], update_op()["o"]["$set"])

        # Case 7: update op, fields provided, entry is nullified
        filtered = filter_doc(update_op(), ["d", "e", "f"])
        self.assertEqual(filtered, None)

        # Case 8: update op, fields provided, replacement
        filtered = filter_doc(
            {
                "op": "u",
                "o": {
                    "a": 1,
                    "b": 2,
                    "c": 3,
                    "d": 4
                }
            }, ["a", "b", "c"])
        self.assertEqual(filtered, {"op": "u", "o": {"a": 1, "b": 2, "c": 3}})

    def test_nested_fields(self):
        def check_nested(document, fields, filtered_document, op="i"):
            if "_id" not in fields:
                fields.append("_id")
            filtered_result = self.opman.filter_oplog_entry(
                {
                    "op": op,
                    "o": document
                }, include_fields=fields)
            if filtered_result is not None:
                filtered_result = filtered_result["o"]
            self.assertEqual(filtered_result, filtered_document)

        document = {"name": "Han Solo", "a": {"b": {}}}
        fields = ["name", "a.b.c"]
        filtered_document = {"name": "Han Solo"}
        check_nested(document, fields, filtered_document)

        document = {
            "a": {
                "b": {
                    "c": 2,
                    "e": 3
                },
                "e": 5
            },
            "b": 2,
            "c": {
                "g": 1
            }
        }
        fields = ["a.b.c", "a.e"]
        filtered_document = {"a": {"b": {"c": 2}, "e": 5}}
        check_nested(document, fields, filtered_document)

        document = {
            "a": {
                "b": {
                    "c": 2,
                    "e": 3
                },
                "e": 5
            },
            "b": 2,
            "c": {
                "g": 1
            },
            "_id": 1,
        }
        fields = ["a.b.c", "a.e"]
        filtered_document = {"a": {"b": {"c": 2}, "e": 5}, "_id": 1}
        check_nested(document, fields, filtered_document)

        document = {"a": {"b": {"c": {"d": 1}}}, "-a": {"-b": {"-c": 2}}}
        fields = ["a.b", "-a"]
        filtered_document = document.copy()
        check_nested(document, fields, filtered_document)

        document = {"a": {"b": {"c": {"d": 1}}}, "-a": {"-b": {"-c": 2}}}
        fields = ["a", "-a.-b"]
        filtered_document = document.copy()
        check_nested(document, fields, filtered_document)
        document = {
            "a": {
                "b": {
                    "c": {
                        "d": 1
                    }
                }
            },
            "-a": {
                "-b": {
                    "-c": 2
                }
            },
            "_id": 1
        }

        fields = ["a.b", "-a"]
        filtered_document = document.copy()
        check_nested(document, fields, filtered_document)
        fields = ["a", "-a.-b"]
        check_nested(document, fields, filtered_document)

        document = {"test": 1}
        fields = ["doesnt_exist"]
        filtered_document = {}
        check_nested(document, fields, filtered_document)

        document = {"a": {"b": 1}, "b": {"a": 1}}
        fields = ["a.b", "b.a"]
        filtered_document = document.copy()
        check_nested(document, fields, filtered_document)

        document = {"a": {"b": {"a": {"b": 1}}}, "c": {"a": {"b": 1}}}
        fields = ["a.b"]
        filtered_document = {"a": {"b": {"a": {"b": 1}}}}
        check_nested(document, fields, filtered_document)

        document = {"name": "anna", "name_of_cat": "pushkin"}
        fields = ["name"]
        filtered_document = {"name": "anna"}
        check_nested(document, fields, filtered_document)

        update = {"$set": {"a.b": 1, "a.c": 3, "b": 2, "c": {"b": 3}}}
        fields = ["a", "c"]
        filtered_update = {"$set": {"a.b": 1, "a.c": 3, "c": {"b": 3}}}
        check_nested(update, fields, filtered_update, op="u")

        update = {"$set": {"a.b": {"c": 3, "d": 1}, "a.e": 1, "a.f": 2}}
        fields = ["a.b.c", "a.e"]
        filtered_update = {"$set": {"a.b": {"c": 3}, "a.e": 1}}
        check_nested(update, fields, filtered_update, op="u")

        update = {"$set": {"a.b.1": 1, "a.b.2": 2, "b": 3}}
        fields = ["a.b"]
        filtered_update = {"$set": {"a.b.1": 1, "a.b.2": 2}}
        check_nested(update, fields, filtered_update, op="u")

        update = {"$set": {"a.b": {"c": 3, "d": 1}, "a.e": 1}}
        fields = ["a.b.e"]
        filtered_update = None
        check_nested(update, fields, filtered_update, op="u")

    def test_nested_exclude_fields(self):
        def check_nested(document, exclude_fields, filtered_document, op="i"):
            if "_id" in exclude_fields:
                exclude_fields.remove("_id")
            filtered_result = self.opman.filter_oplog_entry(
                {
                    "op": op,
                    "o": document
                }, exclude_fields=exclude_fields)
            if filtered_result is not None:
                filtered_result = filtered_result["o"]
            self.assertEqual(filtered_result, filtered_document)

        document = {"a": {"b": {"c": {"d": 0, "e": 1}}}}
        exclude_fields = ["a.b.c.d"]
        filtered_document = {"a": {"b": {"c": {"e": 1}}}}
        check_nested(document, exclude_fields, filtered_document)

        document = {"a": {"b": {"c": {"-a": 0, "d": {"e": {"f": 1}}}}}}
        exclude_fields = ["a.b.c.d.e.f"]
        filtered_document = {"a": {"b": {"c": {"-a": 0, "d": {"e": {}}}}}}
        check_nested(document, exclude_fields, filtered_document)

        document = {"a": 1}
        exclude_fields = ["a"]
        filtered_document = {}
        check_nested(document, exclude_fields, filtered_document)

        document = {
            "a": {
                "b": {
                    "c": 2,
                    "e": 3
                },
                "e": 5
            },
            "b": 2,
            "c": {
                "g": 1
            }
        }
        exclude_fields = ["a.b.c", "a.e"]
        filtered_document = {"a": {"b": {"e": 3}}, "b": 2, "c": {"g": 1}}
        check_nested(document, exclude_fields, filtered_document)

        document = {
            "a": {
                "b": {
                    "c": 2,
                    "e": 3
                },
                "e": 5
            },
            "b": 2,
            "c": {
                "g": 1
            },
            "_id": 1,
        }
        exclude_fields = ["a.b.c", "a.e", "_id"]
        filtered_document = {
            "a": {
                "b": {
                    "e": 3
                }
            },
            "b": 2,
            "c": {
                "g": 1
            },
            "_id": 1
        }
        check_nested(document, exclude_fields, filtered_document)

        document = {"a": {"b": {"c": {"d": 1}}}, "-a": {"-b": {"-c": 2}}}
        exclude_fields = ["a.b", "-a"]
        filtered_document = {"a": {}}
        check_nested(document, exclude_fields, filtered_document)

        document = {"a": {"b": {"c": {"d": 1}}}, "-a": {"-b": {"-c": 2}}}
        exclude_fields = ["a", "-a.-b"]
        filtered_document = {"-a": {}}
        check_nested(document, exclude_fields, filtered_document)

        document = {
            "a": {
                "b": {
                    "c": {
                        "d": 1
                    }
                }
            },
            "-a": {
                "-b": {
                    "-c": 2
                }
            },
            "_id": 1
        }
        exclude_fields = ["a.b", "-a"]
        filtered_document = {"_id": 1, "a": {}}
        check_nested(document, exclude_fields, filtered_document)

        document = {"test": 1}
        exclude_fields = ["doesnt_exist"]
        filtered_document = document.copy()
        check_nested(document, exclude_fields, filtered_document)

        document = {"test": 1}
        exclude_fields = ["test.doesnt_exist"]
        filtered_document = document.copy()
        check_nested(document, exclude_fields, filtered_document)

        document = {"a": {"b": 1}, "b": {"a": 1}}
        exclude_fields = ["a.b", "b.a"]
        filtered_document = {"a": {}, "b": {}}
        check_nested(document, exclude_fields, filtered_document)

        document = {"a": {"b": {"a": {"b": 1}}}, "c": {"a": {"b": 1}}}
        exclude_fields = ["a.b"]
        filtered_document = {"a": {}, "c": {"a": {"b": 1}}}
        check_nested(document, exclude_fields, filtered_document)

        document = {"name": "anna", "name_of_cat": "pushkin"}
        exclude_fields = ["name"]
        filtered_document = {"name_of_cat": "pushkin"}
        check_nested(document, exclude_fields, filtered_document)

        update = {"$set": {"a.b": 1, "a.c": 3, "b": 2, "c": {"b": 3}}}
        exclude_fields = ["a", "c"]
        filtered_update = {"$set": {"b": 2}}
        check_nested(update, exclude_fields, filtered_update, op="u")

        update = {"$set": {"a.b": {"c": 3, "d": 1}, "a.e": 1, "a.f": 2}}
        exclude_fields = ["a.b.c", "a.e"]
        filtered_update = {"$set": {"a.b": {"d": 1}, "a.f": 2}}
        check_nested(update, exclude_fields, filtered_update, op="u")

        update = {"$set": {"a.b": {"c": 3, "d": 1}, "a.e": 1}}
        exclude_fields = ["a.b.c", "a.b.d", "a.e"]
        filtered_update = {"$set": {"a.b": {}}}
        check_nested(update, exclude_fields, filtered_update, op="u")

        update = {"$set": {"a.b.1": 1, "a.b.2": 2, "b": 3}}
        exclude_fields = ["a.b"]
        filtered_update = {"$set": {"b": 3}}
        check_nested(update, exclude_fields, filtered_update, op="u")

        update = {"$set": {"a.b.c": 42, "d.e.f": 123, "g": 456}}
        exclude_fields = ["a.b", "d"]
        filtered_update = {"$set": {"g": 456}}
        check_nested(update, exclude_fields, filtered_update, op="u")

        update = {"$set": {"a.b": {"c": 3, "d": 1}, "a.e": 1}}
        exclude_fields = ["a.b", "a.e"]
        filtered_update = None
        check_nested(update, exclude_fields, filtered_update, op="u")