def __init__(self, url, auto_commit_interval=None, chunk_size=constants.DEFAULT_MAX_BULK, **kwargs): """ Verify URL and establish a connection. """ self.url = url self.auto_commit_interval = auto_commit_interval self.unique_key = '_key' self.chunk_size = chunk_size self.kwargs = kwargs self.arango = self.create_connection() # define meta database and meta collection names self.meta_database = "mongodb_meta" self.meta_collection = "mongodb_data" # check if meta database and meta collection are already present, # if not then create both if self.meta_database not in self.arango.databases(): self.meta_database = self.arango.create_database( self.meta_database) self.meta_database.create_collection(self.meta_collection) else: self.meta_database = self.arango.db(self.meta_database) self.command_helper = CommandHelper()
def test_commands(self): cmd_args = ("test.$cmd", 1) self.elastic_doc.command_helper = CommandHelper() self.elastic_doc.handle_command({"create": "test2"}, *cmd_args) retry_until_ok(self.elastic_conn.indices.refresh, index="") self.assertIn("test2", self._mappings("test")) docs = [ { "_id": 0, "name": "ted" }, { "_id": 1, "name": "marsha" }, { "_id": 2, "name": "nikolas" }, ] self.elastic_doc.upsert(docs[0], "test.test2", 1) self.elastic_doc.upsert(docs[1], "test.test2", 1) self.elastic_doc.upsert(docs[2], "test.test2", 1) # Commit upserted docs as they are in buffer self.elastic_doc.commit() res = list( self.elastic_doc._stream_search(index="test", doc_type="test2", body={"query": { "match_all": {} }})) for d in docs: self.assertTrue(d in res) self.elastic_doc.handle_command({"drop": "test2"}, *cmd_args) retry_until_ok(self.elastic_conn.indices.refresh, index="") res = list( self.elastic_doc._stream_search(index="test", doc_type="test2", body={"query": { "match_all": {} }})) self.assertEqual(0, len(res)) self.elastic_doc.handle_command({"create": "test2"}, *cmd_args) self.elastic_doc.handle_command({"create": "test3"}, *cmd_args) retry_until_ok(self.elastic_conn.indices.refresh, index="") self.elastic_doc.handle_command({"dropDatabase": 1}, *cmd_args) retry_until_ok(self.elastic_conn.indices.refresh, index="") self.assertNotIn("test", self._indices()) self.assertNotIn("test2", self._mappings()) self.assertNotIn("test3", self._mappings())
def initOplogThread(self, namespace_set=[], dest_mapping={}): self.docman = CommandLoggerDocManager() self.docman.command_helper = CommandHelper(namespace_set, dest_mapping) self.opman = OplogThread(primary_client=self.primary_conn, doc_managers=(self.docman, ), oplog_progress_dict=self.oplog_progress, namespace_set=namespace_set, dest_mapping=dest_mapping, collection_dump=False) self.opman.start()
def initOplogThread(self, namespace_set=None): self.docman = CommandLoggerDocManager() namespace_config = NamespaceConfig(namespace_set=namespace_set) self.docman.command_helper = CommandHelper(namespace_config) self.opman = OplogThread(primary_client=self.primary_conn, doc_managers=(self.docman, ), oplog_progress_dict=self.oplog_progress, namespace_config=namespace_config, collection_dump=False) self.opman.start()
def test_commands(self): cmd_args = ('test.$cmd', 1) self.elastic_doc.command_helper = CommandHelper() self.elastic_doc.handle_command({'create': 'test2'}, *cmd_args) time.sleep(1) self.assertIn('test2', self._mappings('test')) docs = [{ "_id": 0, "name": "ted" }, { "_id": 1, "name": "marsha" }, { "_id": 2, "name": "nikolas" }] self.elastic_doc.upsert(docs[0], 'test.test2', 1) self.elastic_doc.upsert(docs[1], 'test.test2', 1) self.elastic_doc.upsert(docs[2], 'test.test2', 1) res = list( self.elastic_doc._stream_search(index="test", doc_type='test2', body={"query": { "match_all": {} }})) for d in docs: self.assertTrue(d in res) self.elastic_doc.handle_command({'drop': 'test2'}, *cmd_args) time.sleep(3) res = list( self.elastic_doc._stream_search(index="test", doc_type='test2', body={"query": { "match_all": {} }})) self.assertEqual(0, len(res)) self.elastic_doc.handle_command({'create': 'test2'}, *cmd_args) self.elastic_doc.handle_command({'create': 'test3'}, *cmd_args) time.sleep(1) self.elastic_doc.handle_command({'dropDatabase': 1}, *cmd_args) time.sleep(1) self.assertNotIn('test', self._indices()) self.assertNotIn('test2', self._mappings()) self.assertNotIn('test3', self._mappings())
def test_commands(self): # Also test with namespace mapping. # Note that mongo-connector does not currently support commands after # renaming a database. namespace_config = NamespaceConfig( namespace_set=['test.test', 'test.test2', 'test.drop'], namespace_options={ 'test.test': 'test.othertest', 'test.drop': 'dropped.collection' }) self.choosy_docman.command_helper = CommandHelper(namespace_config) try: self.choosy_docman.handle_command({'create': 'test'}, *TESTARGS) self.assertIn('othertest', self.mongo_conn['test'].collection_names()) self.choosy_docman.handle_command( { 'renameCollection': 'test.test', 'to': 'test.test2' }, 'admin.$cmd', 1) self.assertNotIn('othertest', self.mongo_conn['test'].collection_names()) self.assertIn('test2', self.mongo_conn['test'].collection_names()) self.choosy_docman.handle_command({'drop': 'test2'}, 'test.$cmd', 1) self.assertNotIn('test2', self.mongo_conn['test'].collection_names()) # WiredTiger drops the database when the last collection is # dropped. if 'test' not in self.mongo_conn.database_names(): self.choosy_docman.handle_command({'create': 'test'}, *TESTARGS) self.assertIn('test', self.mongo_conn.database_names()) self.choosy_docman.handle_command({'dropDatabase': 1}, 'test.$cmd', 1) self.assertNotIn('test', self.mongo_conn.database_names()) # Briefly test mapped database name with dropDatabase command. self.mongo_conn.dropped.collection.insert_one({'a': 1}) self.assertIn('dropped', self.mongo_conn.database_names()) self.choosy_docman.handle_command({'dropDatabase': 1}, 'test.$cmd', 1) self.assertNotIn('dropped', self.mongo_conn.database_names()) finally: self.mongo_conn.drop_database('test')
def test_command_helper(self): mapping = {"a.x": "b.x", "a.y": "c.y"} helper = CommandHelper( NamespaceConfig(namespace_set=list(mapping) + ["a.z"], namespace_options=mapping)) self.assertEqual(set(helper.map_db("a")), set(["a", "b", "c"])) self.assertEqual(helper.map_db("d"), []) self.assertEqual(helper.map_namespace("a.x"), "b.x") self.assertEqual(helper.map_namespace("a.z"), "a.z") self.assertEqual(helper.map_namespace("d.x"), None) self.assertEqual(helper.map_collection("a", "x"), ("b", "x")) self.assertEqual(helper.map_collection("a", "z"), ("a", "z")) self.assertEqual(helper.map_collection("d", "x"), (None, None))
def initOplogThread(self, namespace_set=[], ex_namespace_set=[], dest_mapping={}): self.docman = CommandLoggerDocManager() # Replace the origin dest_mapping self.dest_mapping_stru = DestMapping(namespace_set, ex_namespace_set, dest_mapping) self.docman.command_helper = CommandHelper(self.dest_mapping_stru) self.opman = OplogThread(primary_client=self.primary_conn, doc_managers=(self.docman, ), oplog_progress_dict=self.oplog_progress, dest_mapping_stru=self.dest_mapping_stru, ns_set=namespace_set, ex_ns_set=ex_namespace_set, collection_dump=False) self.opman.start()
def test_command_helper(self): mapping = {'a.x': 'b.x', 'a.y': 'c.y'} helper = CommandHelper( NamespaceConfig(namespace_set=list(mapping) + ['a.z'], namespace_options=mapping)) self.assertEqual(set(helper.map_db('a')), set(['a', 'b', 'c'])) self.assertEqual(helper.map_db('d'), []) self.assertEqual(helper.map_namespace('a.x'), 'b.x') self.assertEqual(helper.map_namespace('a.z'), 'a.z') self.assertEqual(helper.map_namespace('d.x'), None) self.assertEqual(helper.map_collection('a', 'x'), ('b', 'x')) self.assertEqual(helper.map_collection('a', 'z'), ('a', 'z')) self.assertEqual(helper.map_collection('d', 'x'), (None, None))
def test_command_helper(self): mapping = {'a.x': 'b.x', 'a.y': 'c.y'} # Replace the origin dest_mapping dest_mapping_stru = DestMapping(list(mapping) + ['a.z'], [], mapping) helper = CommandHelper(dest_mapping_stru) self.assertEqual(set(helper.map_db('a')), set(['a', 'b', 'c'])) self.assertEqual(helper.map_db('d'), []) self.assertEqual(helper.map_namespace('a.x'), 'b.x') self.assertEqual(helper.map_namespace('a.z'), 'a.z') self.assertEqual(helper.map_namespace('d.x'), None) self.assertEqual(helper.map_collection('a', 'x'), ('b', 'x')) self.assertEqual(helper.map_collection('a', 'z'), ('a', 'z')) self.assertEqual(helper.map_collection('d', 'x'), (None, None))
def test_commands(self): self.docman.command_helper = CommandHelper() def count_ns(ns): return sum(1 for _ in self._search("ns:%s" % ns)) self.docman.upsert({'_id': '1', 'test': 'data'}, *TESTARGS) self.assertEqual(count_ns("test.test"), 1) self.docman.handle_command({'drop': 'test'}, *TESTARGS) time.sleep(1) self.assertEqual(count_ns("test.test"), 0) self.docman.upsert({'_id': '2', 'test': 'data'}, 'test.test2', '2') self.docman.upsert({'_id': '3', 'test': 'data'}, 'test.test3', '3') self.docman.handle_command({'dropDatabase': 1}, 'test.$cmd', 1) time.sleep(1) self.assertEqual(count_ns("test.test2"), 0) self.assertEqual(count_ns("test.test3"), 0)
def buffer_and_drop(self): """Insert document and drop collection while doc is in buffer""" self.elastic_doc.command_helper = CommandHelper() self.elastic_doc.auto_commit_interval = None index = "test3" doc_type = "foo" cmd_args = ('%s.%s' % (index, doc_type), 1) doc_id = 1 doc = {"_id": doc_id, "name": "bar"} self.elastic_doc.upsert(doc, *cmd_args) self.elastic_doc.handle_command({'drop': doc_type}, *cmd_args) retry_until_ok(self.elastic_conn.indices.refresh, index="") # Commit should be called before command has been handled # Which means that buffer should be empty self.assertFalse(self.elastic_doc.BulkBuffer.get_buffer()) # After drop, below search should return no results res = list( self.elastic_doc._stream_search(index=index, doc_type=doc_type, body={"query": { "match_all": {} }})) self.assertFalse(res) # Test dropDatabase as well # Firstly add document to database again # This time update doc as well self.elastic_doc.upsert(doc, *cmd_args) update_spec = {"$set": {"name": "foo2"}} self.elastic_doc.update(doc_id, update_spec, *cmd_args) self.elastic_doc.handle_command({'dropDatabase': 1}, *cmd_args) retry_until_ok(self.elastic_conn.indices.refresh, index="") self.assertFalse(self.elastic_doc.BulkBuffer.get_buffer()) self.assertNotIn(index, self._mappings()) # set auto_commit_interval back to 0 self.elastic_doc.auto_commit_interval = 0
def test_command_helper(self): # Databases cannot be merged mapping = {'a.x': 'c.x', 'b.x': 'c.y'} self.assertRaises(errors.MongoConnectorError, CommandHelper, list(mapping), mapping) mapping = {'a.x': 'b.x', 'a.y': 'c.y'} helper = CommandHelper(list(mapping) + ['a.z'], mapping) self.assertEqual(set(helper.map_db('a')), set(['a', 'b', 'c'])) self.assertEqual(helper.map_db('d'), []) self.assertEqual(helper.map_namespace('a.x'), 'b.x') self.assertEqual(helper.map_namespace('a.z'), 'a.z') self.assertEqual(helper.map_namespace('d.x'), None) self.assertEqual(helper.map_collection('a', 'x'), ('b', 'x')) self.assertEqual(helper.map_collection('a', 'z'), ('a', 'z')) self.assertEqual(helper.map_collection('d', 'x'), (None, None))
def test_commands(self): self.mongo_doc.command_helper = CommandHelper() # create test thing, assert self.mongo_doc.handle_command({'create': 'test'}, *TESTARGS) self.assertIn('test', self.mongo_conn['test'].collection_names()) self.mongo_doc.handle_command( {'renameCollection': 'test.test', 'to': 'test.test2'}, 'admin.$cmd', 1) self.assertNotIn('test', self.mongo_conn['test'].collection_names()) self.assertIn('test2', self.mongo_conn['test'].collection_names()) self.mongo_doc.handle_command({'drop': 'test2'}, 'test.$cmd', 1) self.assertNotIn('test2', self.mongo_conn['test'].collection_names()) self.assertIn('test', self.mongo_conn.database_names()) self.mongo_doc.handle_command({'dropDatabase': 1}, 'test.$cmd', 1) self.assertNotIn('test', self.mongo_conn.database_names())
def test_commands(self): cmd_args = ('test.$cmd', 1) self.elastic_doc.command_helper = CommandHelper() self.elastic_doc.handle_command({'create': 'test2'}, *cmd_args) time.sleep(1) self.assertIn('test2', self._mappings('test')) self.elastic_doc.handle_command({'drop': 'test2'}, *cmd_args) time.sleep(1) self.assertNotIn('test2', self._mappings('test')) self.elastic_doc.handle_command({'create': 'test2'}, *cmd_args) self.elastic_doc.handle_command({'create': 'test3'}, *cmd_args) time.sleep(1) self.elastic_doc.handle_command({'dropDatabase': 1}, *cmd_args) time.sleep(1) self.assertNotIn('test', self._indices()) self.assertNotIn('test2', self._mappings()) self.assertNotIn('test3', self._mappings())
def test_commands(self): # Also test with namespace mapping. # Note that mongo-connector does not currently support commands after # renaming a database. namespace_config = NamespaceConfig( namespace_set=["test.test", "test.test2", "test.drop"], namespace_options={ "test.test": "test.othertest", "test.drop": "dropped.collection", }, ) self.choosy_docman.command_helper = CommandHelper(namespace_config) try: self.choosy_docman.handle_command({"create": "test"}, *TESTARGS) self.assertIn("othertest", self.mongo_conn["test"].collection_names()) self.choosy_docman.handle_command( {"renameCollection": "test.test", "to": "test.test2"}, "admin.$cmd", 1 ) self.assertNotIn("othertest", self.mongo_conn["test"].collection_names()) self.assertIn("test2", self.mongo_conn["test"].collection_names()) self.choosy_docman.handle_command({"drop": "test2"}, "test.$cmd", 1) self.assertNotIn("test2", self.mongo_conn["test"].collection_names()) # WiredTiger drops the database when the last collection is # dropped. if "test" not in self.mongo_conn.database_names(): self.choosy_docman.handle_command({"create": "test"}, *TESTARGS) self.assertIn("test", self.mongo_conn.database_names()) self.choosy_docman.handle_command({"dropDatabase": 1}, "test.$cmd", 1) self.assertNotIn("test", self.mongo_conn.database_names()) # Briefly test mapped database name with dropDatabase command. self.mongo_conn.dropped.collection.insert_one({"a": 1}) self.assertIn("dropped", self.mongo_conn.database_names()) self.choosy_docman.handle_command({"dropDatabase": 1}, "test.$cmd", 1) self.assertNotIn("dropped", self.mongo_conn.database_names()) finally: self.mongo_conn.drop_database("test")
def test_commands(self): # Also test with namespace mapping. # Note that mongo-connector does not currently support commands after # renaming a database. self.mongo_doc.command_helper = CommandHelper( namespace_set=['test.test', 'test.test2', 'test.drop'], dest_mapping={ 'test.test': 'test.othertest', 'test.drop': 'dropped.collection' }) try: self.mongo_doc.handle_command({'create': 'test'}, *TESTARGS) self.assertIn('othertest', self.mongo_conn['test'].collection_names()) self.mongo_doc.handle_command( {'renameCollection': 'test.test', 'to': 'test.test2'}, 'admin.$cmd', 1) self.assertNotIn('othertest', self.mongo_conn['test'].collection_names()) self.assertIn('test2', self.mongo_conn['test'].collection_names()) self.mongo_doc.handle_command( {'drop': 'test2'}, 'test.$cmd', 1) self.assertNotIn('test2', self.mongo_conn['test'].collection_names()) self.assertIn('test', self.mongo_conn.database_names()) self.mongo_doc.handle_command({'dropDatabase': 1}, 'test.$cmd', 1) self.assertNotIn('test', self.mongo_conn.database_names()) # Briefly test mapped database name with dropDatabase command. self.mongo_conn.dropped.collection.insert({'a': 1}) self.assertIn('dropped', self.mongo_conn.database_names()) self.mongo_doc.handle_command({'dropDatabase': 1}, 'test.$cmd', 1) self.assertNotIn('dropped', self.mongo_conn.database_names()) finally: self.mongo_conn.drop_database('test')
def __init__(self, mongo_address, doc_managers=None, **kwargs): super(Connector, self).__init__() # can_run is set to false when we join the thread self.can_run = True # The signal that caused the connector to stop or None self.signal = None # main address - either mongos for sharded setups or a primary otherwise self.address = mongo_address # connection to the main address self.main_conn = None # List of DocManager instances if doc_managers: self.doc_managers = doc_managers else: LOG.warning('No doc managers specified, using simulator.') self.doc_managers = (simulator.DocManager(),) # Password for authentication self.auth_key = kwargs.pop('auth_key', None) # Username for authentication self.auth_username = kwargs.pop('auth_username', None) # The name of the file that stores the progress of the OplogThreads self.oplog_checkpoint = kwargs.pop('oplog_checkpoint', 'oplog.timestamp') # The set of OplogThreads created self.shard_set = {} # Dict of OplogThread/timestamp pairs to record progress self.oplog_progress = LockingDict() # Timezone awareness self.tz_aware = kwargs.get('tz_aware', False) # SSL keyword arguments to MongoClient. ssl_certfile = kwargs.pop('ssl_certfile', None) ssl_ca_certs = kwargs.pop('ssl_ca_certs', None) ssl_keyfile = kwargs.pop('ssl_keyfile', None) ssl_cert_reqs = kwargs.pop('ssl_cert_reqs', None) self.ssl_kwargs = {} if ssl_certfile: self.ssl_kwargs['ssl_certfile'] = ssl_certfile if ssl_ca_certs: self.ssl_kwargs['ssl_ca_certs'] = ssl_ca_certs if ssl_keyfile: self.ssl_kwargs['ssl_keyfile'] = ssl_keyfile if ssl_cert_reqs: self.ssl_kwargs['ssl_cert_reqs'] = ssl_cert_reqs # Save the rest of kwargs. self.kwargs = kwargs # Replace the origin dest_mapping self.dest_mapping = DestMapping(kwargs.get('ns_set', []), kwargs.get('ex_ns_set', []), kwargs.get('dest_mapping', {})) # Initialize and set the command helper command_helper = CommandHelper(self.dest_mapping) for dm in self.doc_managers: dm.command_helper = command_helper if self.oplog_checkpoint is not None: if not os.path.exists(self.oplog_checkpoint): info_str = ("MongoConnector: Can't find %s, " "attempting to create an empty progress log" % self.oplog_checkpoint) LOG.warning(info_str) try: # Create oplog progress file open(self.oplog_checkpoint, "w").close() except IOError as e: LOG.critical("MongoConnector: Could not " "create a progress log: %s" % str(e)) sys.exit(2) else: if (not os.access(self.oplog_checkpoint, os.W_OK) and not os.access(self.oplog_checkpoint, os.R_OK)): LOG.critical("Invalid permissions on %s! Exiting" % (self.oplog_checkpoint)) sys.exit(2)