def test_rename_validation(self): """Test namespace renaming validation.""" # Multiple collections cannot be merged into the same target namespace with self.assertRaises(errors.InvalidConfiguration): NamespaceConfig(namespace_options={ "db1.col1": "newdb.newcol", "db2.col1": "newdb.newcol"}) # Multiple collections cannot be merged into the same target namespace with self.assertRaises(errors.InvalidConfiguration): NamespaceConfig(namespace_options={ "db*.col1": "newdb.newcol*", "db*.col2": "newdb.newcol*"}) # Multiple collections cannot be merged into the same target namespace namespace_config = NamespaceConfig(namespace_options={ "*.coll": "*.new_coll", "db.*": "new_db.*"}) namespace_config.map_namespace("new_db.coll") with self.assertRaises(errors.InvalidConfiguration): # "db.new_coll" should map to "new_db.new_coll" but there is # already a mapping from "new_db.coll" to "new_db.new_coll". namespace_config.map_namespace("db.new_coll") # For the sake of map_db, wildcards cannot be moved from database name # to collection name. with self.assertRaises(errors.InvalidConfiguration): NamespaceConfig(namespace_options={"db*.col": "new_db.col_*"}) # For the sake of map_db, wildcards cannot be moved from collection # name to database name. with self.assertRaises(errors.InvalidConfiguration): NamespaceConfig(namespace_options={"db.*": "new_db_*.col"})
def test_fields_validation(self): """Test including/excluding fields per namespace.""" # Cannot include and exclude fields in the same namespace with self.assertRaises(errors.InvalidConfiguration): NamespaceConfig(namespace_options={ "db.col": { "includeFields": ["a"], "excludeFields": ["b"] } }) # Cannot include fields globally and then exclude fields with self.assertRaises(errors.InvalidConfiguration): NamespaceConfig( include_fields=["a"], namespace_options={"db.col": { "excludeFields": ["b"] }}) # Cannot exclude fields globally and then include fields with self.assertRaises(errors.InvalidConfiguration): NamespaceConfig( exclude_fields=["b"], namespace_options={"db.col": { "includeFields": ["a"] }})
def test_gridfs(self): """Test the gridfs property is set correctly.""" equivalent_namespace_configs = ( NamespaceConfig(gridfs_set=["db1.*"]), NamespaceConfig(namespace_options={"db1.*": { "gridfs": True }})) for namespace_config in equivalent_namespace_configs: self.assertEqual(namespace_config.unmap_namespace("db1.col1"), "db1.col1") self.assertEqual(namespace_config.unmap_namespace("db1.col2"), "db1.col2") self.assertEqual( namespace_config.lookup("db1.col1"), Namespace(dest_name="db1.col1", source_name="db1.col1", gridfs=True)) self.assertListEqual(namespace_config.map_db("db1"), ["db1"]) self.assertEqual(namespace_config.map_namespace("db1.col1"), "db1.col1") self.assertIsNone(namespace_config.map_namespace("db2.col4")) self.assertTrue(namespace_config.lookup("db1.col1").gridfs) self.assertEqual(namespace_config.gridfs_namespace("db1.col1"), "db1.col1") self.assertIsNone(namespace_config.gridfs_namespace("not.gridfs"))
def test_override_namespace_options(self): """Test gridfs_set and dest_mapping arguments override namespace_options. """ namespace_config = NamespaceConfig( namespace_set=["override.me", "override.me2"], gridfs_set=["override.me3"], dest_mapping={ "override.me": "overridden.1", "override.me2": "overridden.2", }, namespace_options={ "override.me": { "rename": "override.me", "includeFields": ["_id", "dont_remove"], }, "override.me2": "override.me2", "override.me3": {"gridfs": False}, }, ) overridden = namespace_config.lookup("override.me") self.assertEqual(overridden.dest_name, "overridden.1") self.assertEqual(overridden.include_fields, set(["_id", "dont_remove"])) overridden = namespace_config.lookup("override.me2") self.assertEqual(overridden.dest_name, "overridden.2") self.assertFalse(overridden.include_fields) self.assertTrue(namespace_config.gridfs_namespace("override.me3"))
def test_projection_exclude_wildcard(self): """Test exclude_fields on a wildcard namespace.""" equivalent_namespace_configs = ( NamespaceConfig(exclude_fields=["_id", "foo", "nested.field"], namespace_set=["db.*"]), NamespaceConfig(namespace_options={ "db.*": { "excludeFields": ["_id", "foo", "nested.field"] } }), NamespaceConfig( exclude_fields=["foo", "nested.field"], namespace_options={ "db.*": { "excludeFields": ["_id", "foo", "nested.field"] } }, ), ) for namespace_config in equivalent_namespace_configs: self.assertEqual(namespace_config.projection("db.foo"), { "foo": 0, "nested.field": 0 }) self.assertIsNone(namespace_config.projection("ignored.name"))
def test_include_and_exclude(self): """Test including and excluding namespaces at the same time.""" equivalent_namespace_configs_for_tests = ( NamespaceConfig( ex_namespace_set=["ex.*"], namespace_set=["ex.cluded_still", "in.cluded"], ), NamespaceConfig( namespace_options={ "ex.*": False, "ex.cluded_still": True, "in.cluded": True, } ), NamespaceConfig( ex_namespace_set=["ex.cluded", "ex.cluded_still"], namespace_set=["ex.*", "in.cluded"], ), ) for namespace_config in equivalent_namespace_configs_for_tests: self.assertIsNone(namespace_config.map_namespace("ex.cluded")) # Excluded namespaces take precedence over included ones. self.assertIsNone(namespace_config.map_namespace("ex.cluded_still")) # Namespaces that are not explicitly included are ignored. self.assertIsNone(namespace_config.map_namespace("also.not.included")) self.assertEqual(namespace_config.map_namespace("in.cluded"), "in.cluded")
def test_default(self): """Test that by default, all namespaces are kept without renaming""" namespace_config = NamespaceConfig() self.assertEqual(namespace_config.unmap_namespace("db1.col1"), "db1.col1") self.assertEqual(namespace_config.map_db("db1"), ["db1"]) self.assertEqual(namespace_config.map_namespace("db1.col1"), "db1.col1")
def test_map_db_wildcard(self): """Test a crazy namespace renaming scheme with wildcards.""" namespace_config = NamespaceConfig(namespace_options={ "db.1_*": "db1.new_*", "db.2_*": "db2.new_*", "db.3": "new_db.3"}) self.assertEqual(set(namespace_config.map_db("db")), set(["db1", "db2", "new_db"]))
def test_include_wildcard_metacharacters(self): """Test namespaces with metacharacters are matched.""" namespace_config = NamespaceConfig(namespace_set=["db&_*.$_^_#_!_[_]_"]) self.assertEqual( namespace_config.map_namespace("db&_foo.$_^_#_!_[_]_"), "db&_foo.$_^_#_!_[_]_", ) self.assertIsNone(namespace_config.map_namespace("db&.foo"))
def test_unmap_namespace_wildcard(self): """Test un-mapping a namespace that was never explicitly mapped.""" namespace_config = NamespaceConfig( namespace_options={"db2.*": "db2.f*", "db_*.foo": "db_new_*.foo"} ) self.assertEqual(namespace_config.unmap_namespace("db2.foo"), "db2.oo") self.assertEqual( namespace_config.unmap_namespace("db_new_123.foo"), "db_123.foo" )
def test_exclude_wildcard(self): """Test excluding namespaces with wildcards""" equivalent_namespace_configs_for_tests = ( NamespaceConfig(ex_namespace_set=["ex.*", "ex2.*"]), NamespaceConfig(namespace_options={"ex.*": False, "ex2.*": False}), # Multiple wildcards in exclude namespace NamespaceConfig(ex_namespace_set=["e*.*"]), ) for namespace_config in equivalent_namespace_configs_for_tests: self.assertEqual(namespace_config.unmap_namespace("db.col"), "db.col") self.assertEqual(namespace_config.unmap_namespace("ex.clude"), "ex.clude") self.assertEqual(namespace_config.map_namespace("db.col"), "db.col") self.assertIsNone(namespace_config.map_namespace("ex.clude")) self.assertIsNone(namespace_config.map_namespace("ex2.clude"))
def test_include_plain(self): """Test including namespaces without wildcards""" namespace_config = NamespaceConfig( namespace_set=["db1.col1", "db1.col2"]) self.assertEqual(namespace_config.unmap_namespace("db1.col1"), "db1.col1") self.assertEqual(namespace_config.unmap_namespace("db1.col2"), "db1.col2") self.assertIsNone(namespace_config.unmap_namespace("not.included")) self.assertEqual(namespace_config.map_db("db1"), ["db1"]) self.assertEqual(namespace_config.map_db("not_included"), []) self.assertEqual(namespace_config.map_namespace("db1.col1"), "db1.col1") self.assertEqual(namespace_config.map_namespace("db1.col2"), "db1.col2") self.assertIsNone(namespace_config.map_namespace("db1.col4"))
def test_config(self): """Test that the namespace option in the example config is valid.""" package = "mongo_connector.service" stream = importlib_resources.open_text(package, "config.json") with stream: namespaces = json.load(stream)["__namespaces"] NamespaceConfig(namespace_options=namespaces)
def test_skipped_oplog_entry_updates_checkpoint(self): repl_set = ReplicaSetSingle().start() conn = repl_set.client() opman = OplogThread( primary_client=conn, doc_managers=(DocManager(),), oplog_progress_dict=LockingDict(), namespace_config=NamespaceConfig(namespace_set=["test.test"]), ) opman.start() # Insert a document into an included collection conn["test"]["test"].insert_one({"test": 1}) last_ts = opman.get_last_oplog_timestamp() assert_soon( lambda: last_ts == opman.checkpoint, "OplogThread never updated checkpoint to non-skipped " "entry.", ) self.assertEqual(len(opman.doc_managers[0]._search()), 1) # Make sure that the oplog thread updates its checkpoint on every # oplog entry. conn["test"]["ignored"].insert_one({"test": 1}) last_ts = opman.get_last_oplog_timestamp() assert_soon( lambda: last_ts == opman.checkpoint, "OplogThread never updated checkpoint to skipped entry.", ) opman.join() conn.close() repl_set.stop()
def setUp(self): # Create a new oplog progress file try: os.unlink("oplog.timestamp") except OSError: pass open("oplog.timestamp", "w").close() # Start a replica set self.repl_set = ReplicaSet().start() # Connection to the replica set as a whole self.main_conn = self.repl_set.client() # Connection to the primary specifically self.primary_conn = self.repl_set.primary.client() # Connection to the secondary specifically self.secondary_conn = self.repl_set.secondary.client( read_preference=ReadPreference.SECONDARY_PREFERRED) # Wipe any test data self.main_conn.drop_database("test") # Oplog thread doc_manager = DocManager() oplog_progress = LockingDict() self.opman = OplogThread( primary_client=self.main_conn, doc_managers=(doc_manager, ), oplog_progress_dict=oplog_progress, namespace_config=NamespaceConfig(namespace_set=["test.mc"]), )
def setUp(self): self.namespace_config = NamespaceConfig() self.opman = OplogThread( primary_client=self.primary_conn, doc_managers=(DocManager(), ), oplog_progress_dict=LockingDict(), namespace_config=self.namespace_config, )
def reset_opman(self, include_ns=None, exclude_ns=None, dest_mapping=None): self.namespace_config = NamespaceConfig(namespace_set=include_ns, ex_namespace_set=exclude_ns, namespace_options=dest_mapping) self.opman = OplogThread(primary_client=self.primary_conn, doc_managers=(DocManager(), ), oplog_progress_dict=LockingDict(), namespace_config=self.namespace_config)
def test_gridfs_rename_invalid(self): """Test that renaming a GridFS collection is invalid.""" with self.assertRaises(errors.InvalidConfiguration): NamespaceConfig( namespace_options={ "gridfs.*": {"rename": "new_gridfs.*", "gridfs": True} } )
def test_get_included_databases(self): """Test get_included_databases.""" nc_requires_list_databases = ( NamespaceConfig(), NamespaceConfig(namespace_options={"db.c": False}), NamespaceConfig(namespace_options={"db*.c": True}), ) for namespace_config in nc_requires_list_databases: self.assertEqual(namespace_config.get_included_databases(), []) namespace_config = NamespaceConfig(namespace_options={"db.c": True}) self.assertEqual(namespace_config.get_included_databases(), ["db"]) namespace_config = NamespaceConfig(namespace_options={"db.*": True}) self.assertEqual(namespace_config.get_included_databases(), ["db"])
def test_include_wildcard(self): """Test including namespaces with wildcards""" equivalent_namespace_configs = ( NamespaceConfig(namespace_set=["db1.*"]), NamespaceConfig(namespace_options={"db1.*": {}}), NamespaceConfig(namespace_options={"db1.*": True}), NamespaceConfig(namespace_options={"db1.*": {"rename": "db1.*"}}), ) for namespace_config in equivalent_namespace_configs: self.assertEqual(namespace_config.unmap_namespace("db1.col1"), "db1.col1") self.assertEqual(namespace_config.unmap_namespace("db1.col2"), "db1.col2") self.assertEqual( namespace_config.lookup("db1.col1"), Namespace(dest_name="db1.col1", source_name="db1.col1"), ) self.assertListEqual(namespace_config.map_db("db1"), ["db1"]) self.assertEqual(namespace_config.map_namespace("db1.col1"), "db1.col1") self.assertIsNone(namespace_config.map_namespace("db2.col4"))
def test_invalid_collection_name_validation(self): """Test that invalid collection names raise InvalidConfiguration.""" equivalent_namespace_config_kwargs = ( dict(namespace_options={"invalid_db": "newinvalid_db"}), dict(namespace_set=["invalid_db."]), dict(ex_namespace_set=[".invalid_db"]), dict(gridfs_set=[".invalid_db"])) for kwargs in equivalent_namespace_config_kwargs: with self.assertRaises(errors.InvalidConfiguration): NamespaceConfig(**kwargs)
def initOplogThread(self, namespace_set=None): self.docman = CommandLoggerDocManager() namespace_config = NamespaceConfig(namespace_set=namespace_set) self.docman.command_helper = CommandHelper(namespace_config) self.opman = OplogThread(primary_client=self.primary_conn, doc_managers=(self.docman, ), oplog_progress_dict=self.oplog_progress, namespace_config=namespace_config, collection_dump=False) self.opman.start()
def test_dump_collection(self): """Test the dump_collection method Cases: 1. empty oplog 2. non-empty oplog, with gridfs collections 3. non-empty oplog, specified a namespace-set, none of the oplog entries are for collections in the namespace-set """ # Test with empty oplog self.opman.oplog = self.primary_conn["test"]["emptycollection"] last_ts = self.opman.dump_collection() self.assertEqual(last_ts, None) # Test with non-empty oplog with gridfs collections self.opman.oplog = self.primary_conn["local"]["oplog.rs"] # Insert 10 gridfs files for i in range(10): fs = gridfs.GridFS(self.primary_conn["gridfs"], collection="test" + str(i)) fs.put(b"hello world") # Insert 1000 documents for i in range(1000): self.primary_conn["test"]["test"].insert_one({ "i": i + 500 }) last_ts = self.opman.get_last_oplog_timestamp() self.assertEqual(last_ts, self.opman.dump_collection()) self.assertEqual(len(self.opman.doc_managers[0]._search()), 1010) # Case 3 # 1MB oplog so that we can rollover quickly repl_set = ReplicaSetSingle(oplogSize=1).start() conn = repl_set.client() opman = OplogThread( primary_client=conn, doc_managers=(DocManager(),), oplog_progress_dict=LockingDict(), namespace_config=NamespaceConfig(namespace_set=["test.test"]), ) # Insert a document into an included collection conn["test"]["test"].insert_one({"test": 1}) # Cause the oplog to rollover on a non-included collection while conn["local"]["oplog.rs"].find_one({"ns": "test.test"}): conn["test"]["ignored"].insert_many( [{"test": "1" * 1024} for _ in range(1024)]) last_ts = opman.get_last_oplog_timestamp() self.assertEqual(last_ts, opman.dump_collection()) self.assertEqual(len(opman.doc_managers[0]._search()), 1) conn.close() repl_set.stop()
def setUp(self): self.repl_set = ReplicaSetSingle().start() self.primary_conn = self.repl_set.client() self.oplog_coll = self.primary_conn.local["oplog.rs"] self.opman = OplogThread( primary_client=self.primary_conn, doc_managers=(DocManager(),), oplog_progress_dict=LockingDict(), namespace_config=NamespaceConfig( namespace_options={"test.*": True, "gridfs.*": {"gridfs": True}} ), )
def test_exclude_plain(self): """Test excluding namespaces without wildcards""" namespace_config = NamespaceConfig(ex_namespace_set=["ex.clude"]) self.assertEqual(namespace_config.unmap_namespace("db.col"), "db.col") self.assertEqual(namespace_config.unmap_namespace("ex.clude"), "ex.clude") self.assertEqual(namespace_config.map_namespace("db.col"), "db.col") self.assertIsNone(namespace_config.map_namespace("ex.clude"))
def test_include_and_exclude_validation(self): """Test including and excluding the same namespaces is an error.""" equivalent_namespace_config_kwargs = ( dict(ex_namespace_set=["ex.cluded"], namespace_set=["in.cluded", "ex.cluded"]), dict(namespace_set=["ex.cluded"], namespace_options={ "ex.cluded": False, "in.cluded": True}), dict(ex_namespace_set=["ex.cluded", "in.cluded"], namespace_options={"in.cluded": True}) ) for kwargs in equivalent_namespace_config_kwargs: with self.assertRaises(errors.InvalidConfiguration): NamespaceConfig(**kwargs)
def test_update(self): """Test that Connector can replicate updates successfully.""" doc = {"a": 1, "b": 2} self.conn.test.test.insert_one(doc) selector = {"_id": doc["_id"]} def update_and_retrieve(update_spec, replace=False): if replace: self.conn.test.test.replace_one(selector, update_spec) else: self.conn.test.test.update_one(selector, update_spec) # self.conn.test.test.update(selector, update_spec) # Give the connector some time to perform update time.sleep(1) return self.synchronizer._search()[0] # Update whole document doc = update_and_retrieve({"a": 1, "b": 2, "c": 10}, replace=True) self.assertEqual(doc["a"], 1) self.assertEqual(doc["b"], 2) self.assertEqual(doc["c"], 10) # $set only doc = update_and_retrieve({"$set": {"b": 4}}) self.assertEqual(doc["a"], 1) self.assertEqual(doc["b"], 4) # $unset only doc = update_and_retrieve({"$unset": {"a": True}}) self.assertNotIn("a", doc) self.assertEqual(doc["b"], 4) # mixed $set/$unset doc = update_and_retrieve({"$unset": {"b": True}, "$set": {"c": 3}}) self.assertEqual(doc["c"], 3) self.assertNotIn("b", doc) # ensure update works when fields are given opthread = self.connector.shard_set[0] opthread.namespace_config = NamespaceConfig( include_fields=["a", "b", "c"]) try: doc = update_and_retrieve({"$set": {"d": 10}}) self.assertEqual(self.conn.test.test.find_one(doc["_id"])["d"], 10) self.assertNotIn("d", doc) doc = update_and_retrieve({"$set": {"a": 10}}) self.assertEqual(doc["a"], 10) finally: # cleanup opthread.fields = None
def test_commands(self): # Also test with namespace mapping. # Note that mongo-connector does not currently support commands after # renaming a database. namespace_config = NamespaceConfig( namespace_set=['test.test', 'test.test2', 'test.drop'], namespace_options={ 'test.test': 'test.othertest', 'test.drop': 'dropped.collection' }) self.choosy_docman.command_helper = CommandHelper(namespace_config) try: self.choosy_docman.handle_command({'create': 'test'}, *TESTARGS) self.assertIn('othertest', self.mongo_conn['test'].collection_names()) self.choosy_docman.handle_command( { 'renameCollection': 'test.test', 'to': 'test.test2' }, 'admin.$cmd', 1) self.assertNotIn('othertest', self.mongo_conn['test'].collection_names()) self.assertIn('test2', self.mongo_conn['test'].collection_names()) self.choosy_docman.handle_command({'drop': 'test2'}, 'test.$cmd', 1) self.assertNotIn('test2', self.mongo_conn['test'].collection_names()) # WiredTiger drops the database when the last collection is # dropped. if 'test' not in self.mongo_conn.database_names(): self.choosy_docman.handle_command({'create': 'test'}, *TESTARGS) self.assertIn('test', self.mongo_conn.database_names()) self.choosy_docman.handle_command({'dropDatabase': 1}, 'test.$cmd', 1) self.assertNotIn('test', self.mongo_conn.database_names()) # Briefly test mapped database name with dropDatabase command. self.mongo_conn.dropped.collection.insert_one({'a': 1}) self.assertIn('dropped', self.mongo_conn.database_names()) self.choosy_docman.handle_command({'dropDatabase': 1}, 'test.$cmd', 1) self.assertNotIn('dropped', self.mongo_conn.database_names()) finally: self.mongo_conn.drop_database('test')
def test_command_helper(self): mapping = {'a.x': 'b.x', 'a.y': 'c.y'} helper = CommandHelper( NamespaceConfig(namespace_set=list(mapping) + ['a.z'], namespace_options=mapping)) self.assertEqual(set(helper.map_db('a')), set(['a', 'b', 'c'])) self.assertEqual(helper.map_db('d'), []) self.assertEqual(helper.map_namespace('a.x'), 'b.x') self.assertEqual(helper.map_namespace('a.z'), 'a.z') self.assertEqual(helper.map_namespace('d.x'), None) self.assertEqual(helper.map_collection('a', 'x'), ('b', 'x')) self.assertEqual(helper.map_collection('a', 'z'), ('a', 'z')) self.assertEqual(helper.map_collection('d', 'x'), (None, None))
def test_command_helper(self): mapping = {"a.x": "b.x", "a.y": "c.y"} helper = CommandHelper( NamespaceConfig(namespace_set=list(mapping) + ["a.z"], namespace_options=mapping)) self.assertEqual(set(helper.map_db("a")), set(["a", "b", "c"])) self.assertEqual(helper.map_db("d"), []) self.assertEqual(helper.map_namespace("a.x"), "b.x") self.assertEqual(helper.map_namespace("a.z"), "a.z") self.assertEqual(helper.map_namespace("d.x"), None) self.assertEqual(helper.map_collection("a", "x"), ("b", "x")) self.assertEqual(helper.map_collection("a", "z"), ("a", "z")) self.assertEqual(helper.map_collection("d", "x"), (None, None))
def test_include_wildcard_periods(self): """Test the '.' in the namespace only matches '.'""" namespace_config = NamespaceConfig(namespace_set=["db.*"]) self.assertIsNone(namespace_config.map_namespace("dbxcol")) self.assertEqual(namespace_config.map_namespace("db.col"), "db.col")
def test_include_wildcard_multiple_periods(self): """Test matching a namespace with multiple '.' characters.""" namespace_config = NamespaceConfig(namespace_set=["db.col.*"]) self.assertIsNone(namespace_config.map_namespace("db.col")) self.assertEqual(namespace_config.map_namespace("db.col."), "db.col.")
def test_include_wildcard_no_period_in_database(self): """Test that a database wildcard cannot match a period.""" namespace_config = NamespaceConfig(namespace_set=["db*.col"]) self.assertIsNone(namespace_config.map_namespace("db.bar.col")) self.assertEqual(namespace_config.map_namespace("dbfoo.col"), "dbfoo.col")