def __init__(self, url, auto_commit_interval=None, chunk_size=constants.DEFAULT_MAX_BULK, **kwargs): """ Verify URL and establish a connection. """ self.url = url self.auto_commit_interval = auto_commit_interval self.unique_key = '_key' self.chunk_size = chunk_size self.kwargs = kwargs self.arango = self.create_connection() # define meta database and meta collection names self.meta_database = "mongodb_meta" self.meta_collection = "mongodb_data" # check if meta database and meta collection are already present, # if not then create both if self.meta_database not in self.arango.databases(): self.meta_database = self.arango.create_database( self.meta_database) self.meta_database.create_collection(self.meta_collection) else: self.meta_database = self.arango.db(self.meta_database) self.command_helper = CommandHelper()
def test_command_helper(self): # Databases cannot be merged mapping = { 'a.x': 'c.x', 'b.x': 'c.y' } self.assertRaises(errors.MongoConnectorError, CommandHelper, list(mapping), mapping) mapping = { 'a.x': 'b.x', 'a.y': 'c.y' } helper = CommandHelper(list(mapping) + ['a.z'], mapping) self.assertEqual(set(helper.map_db('a')), set(['a', 'b', 'c'])) self.assertEqual(helper.map_db('d'), []) self.assertEqual(helper.map_namespace('a.x'), 'b.x') self.assertEqual(helper.map_namespace('a.z'), 'a.z') self.assertEqual(helper.map_namespace('d.x'), None) self.assertEqual(helper.map_collection('a', 'x'), ('b', 'x')) self.assertEqual(helper.map_collection('a', 'z'), ('a', 'z')) self.assertEqual(helper.map_collection('d', 'x'), (None, None))
def test_commands(self): cmd_args = ("test.$cmd", 1) self.elastic_doc.command_helper = CommandHelper() self.elastic_doc.handle_command({"create": "test2"}, *cmd_args) retry_until_ok(self.elastic_conn.indices.refresh, index="") self.assertIn("test2", self._mappings("test")) docs = [ { "_id": 0, "name": "ted" }, { "_id": 1, "name": "marsha" }, { "_id": 2, "name": "nikolas" }, ] self.elastic_doc.upsert(docs[0], "test.test2", 1) self.elastic_doc.upsert(docs[1], "test.test2", 1) self.elastic_doc.upsert(docs[2], "test.test2", 1) # Commit upserted docs as they are in buffer self.elastic_doc.commit() res = list( self.elastic_doc._stream_search(index="test", doc_type="test2", body={"query": { "match_all": {} }})) for d in docs: self.assertTrue(d in res) self.elastic_doc.handle_command({"drop": "test2"}, *cmd_args) retry_until_ok(self.elastic_conn.indices.refresh, index="") res = list( self.elastic_doc._stream_search(index="test", doc_type="test2", body={"query": { "match_all": {} }})) self.assertEqual(0, len(res)) self.elastic_doc.handle_command({"create": "test2"}, *cmd_args) self.elastic_doc.handle_command({"create": "test3"}, *cmd_args) retry_until_ok(self.elastic_conn.indices.refresh, index="") self.elastic_doc.handle_command({"dropDatabase": 1}, *cmd_args) retry_until_ok(self.elastic_conn.indices.refresh, index="") self.assertNotIn("test", self._indices()) self.assertNotIn("test2", self._mappings()) self.assertNotIn("test3", self._mappings())
def initOplogThread(self, namespace_set=[], dest_mapping={}): self.docman = CommandLoggerDocManager() self.docman.command_helper = CommandHelper(namespace_set, dest_mapping) self.opman = OplogThread(primary_client=self.primary_conn, doc_managers=(self.docman, ), oplog_progress_dict=self.oplog_progress, namespace_set=namespace_set, dest_mapping=dest_mapping, collection_dump=False) self.opman.start()
def initOplogThread(self, namespace_set=None): self.docman = CommandLoggerDocManager() namespace_config = NamespaceConfig(namespace_set=namespace_set) self.docman.command_helper = CommandHelper(namespace_config) self.opman = OplogThread(primary_client=self.primary_conn, doc_managers=(self.docman, ), oplog_progress_dict=self.oplog_progress, namespace_config=namespace_config, collection_dump=False) self.opman.start()
def test_commands(self): cmd_args = ('test.$cmd', 1) self.elastic_doc.command_helper = CommandHelper() self.elastic_doc.handle_command({'create': 'test2'}, *cmd_args) time.sleep(1) self.assertIn('test2', self._mappings('test')) docs = [{ "_id": 0, "name": "ted" }, { "_id": 1, "name": "marsha" }, { "_id": 2, "name": "nikolas" }] self.elastic_doc.upsert(docs[0], 'test.test2', 1) self.elastic_doc.upsert(docs[1], 'test.test2', 1) self.elastic_doc.upsert(docs[2], 'test.test2', 1) res = list( self.elastic_doc._stream_search(index="test", doc_type='test2', body={"query": { "match_all": {} }})) for d in docs: self.assertTrue(d in res) self.elastic_doc.handle_command({'drop': 'test2'}, *cmd_args) time.sleep(3) res = list( self.elastic_doc._stream_search(index="test", doc_type='test2', body={"query": { "match_all": {} }})) self.assertEqual(0, len(res)) self.elastic_doc.handle_command({'create': 'test2'}, *cmd_args) self.elastic_doc.handle_command({'create': 'test3'}, *cmd_args) time.sleep(1) self.elastic_doc.handle_command({'dropDatabase': 1}, *cmd_args) time.sleep(1) self.assertNotIn('test', self._indices()) self.assertNotIn('test2', self._mappings()) self.assertNotIn('test3', self._mappings())
def test_commands(self): # Also test with namespace mapping. # Note that mongo-connector does not currently support commands after # renaming a database. namespace_config = NamespaceConfig( namespace_set=['test.test', 'test.test2', 'test.drop'], namespace_options={ 'test.test': 'test.othertest', 'test.drop': 'dropped.collection' }) self.choosy_docman.command_helper = CommandHelper(namespace_config) try: self.choosy_docman.handle_command({'create': 'test'}, *TESTARGS) self.assertIn('othertest', self.mongo_conn['test'].collection_names()) self.choosy_docman.handle_command( { 'renameCollection': 'test.test', 'to': 'test.test2' }, 'admin.$cmd', 1) self.assertNotIn('othertest', self.mongo_conn['test'].collection_names()) self.assertIn('test2', self.mongo_conn['test'].collection_names()) self.choosy_docman.handle_command({'drop': 'test2'}, 'test.$cmd', 1) self.assertNotIn('test2', self.mongo_conn['test'].collection_names()) # WiredTiger drops the database when the last collection is # dropped. if 'test' not in self.mongo_conn.database_names(): self.choosy_docman.handle_command({'create': 'test'}, *TESTARGS) self.assertIn('test', self.mongo_conn.database_names()) self.choosy_docman.handle_command({'dropDatabase': 1}, 'test.$cmd', 1) self.assertNotIn('test', self.mongo_conn.database_names()) # Briefly test mapped database name with dropDatabase command. self.mongo_conn.dropped.collection.insert_one({'a': 1}) self.assertIn('dropped', self.mongo_conn.database_names()) self.choosy_docman.handle_command({'dropDatabase': 1}, 'test.$cmd', 1) self.assertNotIn('dropped', self.mongo_conn.database_names()) finally: self.mongo_conn.drop_database('test')
def test_command_helper(self): mapping = {'a.x': 'b.x', 'a.y': 'c.y'} helper = CommandHelper( NamespaceConfig(namespace_set=list(mapping) + ['a.z'], namespace_options=mapping)) self.assertEqual(set(helper.map_db('a')), set(['a', 'b', 'c'])) self.assertEqual(helper.map_db('d'), []) self.assertEqual(helper.map_namespace('a.x'), 'b.x') self.assertEqual(helper.map_namespace('a.z'), 'a.z') self.assertEqual(helper.map_namespace('d.x'), None) self.assertEqual(helper.map_collection('a', 'x'), ('b', 'x')) self.assertEqual(helper.map_collection('a', 'z'), ('a', 'z')) self.assertEqual(helper.map_collection('d', 'x'), (None, None))
def test_command_helper(self): mapping = {"a.x": "b.x", "a.y": "c.y"} helper = CommandHelper( NamespaceConfig(namespace_set=list(mapping) + ["a.z"], namespace_options=mapping)) self.assertEqual(set(helper.map_db("a")), set(["a", "b", "c"])) self.assertEqual(helper.map_db("d"), []) self.assertEqual(helper.map_namespace("a.x"), "b.x") self.assertEqual(helper.map_namespace("a.z"), "a.z") self.assertEqual(helper.map_namespace("d.x"), None) self.assertEqual(helper.map_collection("a", "x"), ("b", "x")) self.assertEqual(helper.map_collection("a", "z"), ("a", "z")) self.assertEqual(helper.map_collection("d", "x"), (None, None))
def test_command_helper(self): mapping = {'a.x': 'b.x', 'a.y': 'c.y'} # Replace the origin dest_mapping dest_mapping_stru = DestMapping(list(mapping) + ['a.z'], [], mapping) helper = CommandHelper(dest_mapping_stru) self.assertEqual(set(helper.map_db('a')), set(['a', 'b', 'c'])) self.assertEqual(helper.map_db('d'), []) self.assertEqual(helper.map_namespace('a.x'), 'b.x') self.assertEqual(helper.map_namespace('a.z'), 'a.z') self.assertEqual(helper.map_namespace('d.x'), None) self.assertEqual(helper.map_collection('a', 'x'), ('b', 'x')) self.assertEqual(helper.map_collection('a', 'z'), ('a', 'z')) self.assertEqual(helper.map_collection('d', 'x'), (None, None))
def test_command_helper(self): # Databases cannot be merged mapping = {'a.x': 'c.x', 'b.x': 'c.y'} self.assertRaises(errors.MongoConnectorError, CommandHelper, list(mapping), mapping) mapping = {'a.x': 'b.x', 'a.y': 'c.y'} helper = CommandHelper(list(mapping) + ['a.z'], mapping) self.assertEqual(set(helper.map_db('a')), set(['a', 'b', 'c'])) self.assertEqual(helper.map_db('d'), []) self.assertEqual(helper.map_namespace('a.x'), 'b.x') self.assertEqual(helper.map_namespace('a.z'), 'a.z') self.assertEqual(helper.map_namespace('d.x'), None) self.assertEqual(helper.map_collection('a', 'x'), ('b', 'x')) self.assertEqual(helper.map_collection('a', 'z'), ('a', 'z')) self.assertEqual(helper.map_collection('d', 'x'), (None, None))
def initOplogThread(self, namespace_set=[], ex_namespace_set=[], dest_mapping={}): self.docman = CommandLoggerDocManager() # Replace the origin dest_mapping self.dest_mapping_stru = DestMapping(namespace_set, ex_namespace_set, dest_mapping) self.docman.command_helper = CommandHelper(self.dest_mapping_stru) self.opman = OplogThread(primary_client=self.primary_conn, doc_managers=(self.docman, ), oplog_progress_dict=self.oplog_progress, dest_mapping_stru=self.dest_mapping_stru, ns_set=namespace_set, ex_ns_set=ex_namespace_set, collection_dump=False) self.opman.start()
def test_commands(self): self.docman.command_helper = CommandHelper() def count_ns(ns): return sum(1 for _ in self._search("ns:%s" % ns)) self.docman.upsert({'_id': '1', 'test': 'data'}, *TESTARGS) self.assertEqual(count_ns("test.test"), 1) self.docman.handle_command({'drop': 'test'}, *TESTARGS) time.sleep(1) self.assertEqual(count_ns("test.test"), 0) self.docman.upsert({'_id': '2', 'test': 'data'}, 'test.test2', '2') self.docman.upsert({'_id': '3', 'test': 'data'}, 'test.test3', '3') self.docman.handle_command({'dropDatabase': 1}, 'test.$cmd', 1) time.sleep(1) self.assertEqual(count_ns("test.test2"), 0) self.assertEqual(count_ns("test.test3"), 0)
def buffer_and_drop(self): """Insert document and drop collection while doc is in buffer""" self.elastic_doc.command_helper = CommandHelper() self.elastic_doc.auto_commit_interval = None index = "test3" doc_type = "foo" cmd_args = ('%s.%s' % (index, doc_type), 1) doc_id = 1 doc = {"_id": doc_id, "name": "bar"} self.elastic_doc.upsert(doc, *cmd_args) self.elastic_doc.handle_command({'drop': doc_type}, *cmd_args) retry_until_ok(self.elastic_conn.indices.refresh, index="") # Commit should be called before command has been handled # Which means that buffer should be empty self.assertFalse(self.elastic_doc.BulkBuffer.get_buffer()) # After drop, below search should return no results res = list( self.elastic_doc._stream_search(index=index, doc_type=doc_type, body={"query": { "match_all": {} }})) self.assertFalse(res) # Test dropDatabase as well # Firstly add document to database again # This time update doc as well self.elastic_doc.upsert(doc, *cmd_args) update_spec = {"$set": {"name": "foo2"}} self.elastic_doc.update(doc_id, update_spec, *cmd_args) self.elastic_doc.handle_command({'dropDatabase': 1}, *cmd_args) retry_until_ok(self.elastic_conn.indices.refresh, index="") self.assertFalse(self.elastic_doc.BulkBuffer.get_buffer()) self.assertNotIn(index, self._mappings()) # set auto_commit_interval back to 0 self.elastic_doc.auto_commit_interval = 0
def test_commands(self): self.mongo_doc.command_helper = CommandHelper() # create test thing, assert self.mongo_doc.handle_command({'create': 'test'}, *TESTARGS) self.assertIn('test', self.mongo_conn['test'].collection_names()) self.mongo_doc.handle_command( {'renameCollection': 'test.test', 'to': 'test.test2'}, 'admin.$cmd', 1) self.assertNotIn('test', self.mongo_conn['test'].collection_names()) self.assertIn('test2', self.mongo_conn['test'].collection_names()) self.mongo_doc.handle_command({'drop': 'test2'}, 'test.$cmd', 1) self.assertNotIn('test2', self.mongo_conn['test'].collection_names()) self.assertIn('test', self.mongo_conn.database_names()) self.mongo_doc.handle_command({'dropDatabase': 1}, 'test.$cmd', 1) self.assertNotIn('test', self.mongo_conn.database_names())
def test_commands(self): cmd_args = ('test.$cmd', 1) self.elastic_doc.command_helper = CommandHelper() self.elastic_doc.handle_command({'create': 'test2'}, *cmd_args) time.sleep(1) self.assertIn('test2', self._mappings('test')) self.elastic_doc.handle_command({'drop': 'test2'}, *cmd_args) time.sleep(1) self.assertNotIn('test2', self._mappings('test')) self.elastic_doc.handle_command({'create': 'test2'}, *cmd_args) self.elastic_doc.handle_command({'create': 'test3'}, *cmd_args) time.sleep(1) self.elastic_doc.handle_command({'dropDatabase': 1}, *cmd_args) time.sleep(1) self.assertNotIn('test', self._indices()) self.assertNotIn('test2', self._mappings()) self.assertNotIn('test3', self._mappings())
def test_commands(self): # Also test with namespace mapping. # Note that mongo-connector does not currently support commands after # renaming a database. namespace_config = NamespaceConfig( namespace_set=["test.test", "test.test2", "test.drop"], namespace_options={ "test.test": "test.othertest", "test.drop": "dropped.collection", }, ) self.choosy_docman.command_helper = CommandHelper(namespace_config) try: self.choosy_docman.handle_command({"create": "test"}, *TESTARGS) self.assertIn("othertest", self.mongo_conn["test"].collection_names()) self.choosy_docman.handle_command( {"renameCollection": "test.test", "to": "test.test2"}, "admin.$cmd", 1 ) self.assertNotIn("othertest", self.mongo_conn["test"].collection_names()) self.assertIn("test2", self.mongo_conn["test"].collection_names()) self.choosy_docman.handle_command({"drop": "test2"}, "test.$cmd", 1) self.assertNotIn("test2", self.mongo_conn["test"].collection_names()) # WiredTiger drops the database when the last collection is # dropped. if "test" not in self.mongo_conn.database_names(): self.choosy_docman.handle_command({"create": "test"}, *TESTARGS) self.assertIn("test", self.mongo_conn.database_names()) self.choosy_docman.handle_command({"dropDatabase": 1}, "test.$cmd", 1) self.assertNotIn("test", self.mongo_conn.database_names()) # Briefly test mapped database name with dropDatabase command. self.mongo_conn.dropped.collection.insert_one({"a": 1}) self.assertIn("dropped", self.mongo_conn.database_names()) self.choosy_docman.handle_command({"dropDatabase": 1}, "test.$cmd", 1) self.assertNotIn("dropped", self.mongo_conn.database_names()) finally: self.mongo_conn.drop_database("test")
def test_command_helper(self): mapping = {"a.x": "b.x", "a.y": "c.y"} helper = CommandHelper( NamespaceConfig( namespace_set=list(mapping) + ["a.z"], namespace_options=mapping ) ) self.assertEqual(set(helper.map_db("a")), set(["a", "b", "c"])) self.assertEqual(helper.map_db("d"), []) self.assertEqual(helper.map_namespace("a.x"), "b.x") self.assertEqual(helper.map_namespace("a.z"), "a.z") self.assertEqual(helper.map_namespace("d.x"), None) self.assertEqual(helper.map_collection("a", "x"), ("b", "x")) self.assertEqual(helper.map_collection("a", "z"), ("a", "z")) self.assertEqual(helper.map_collection("d", "x"), (None, None))
def test_command_helper(self): mapping = { 'a.x': 'b.x', 'a.y': 'c.y' } helper = CommandHelper(NamespaceConfig( namespace_set=list(mapping) + ['a.z'], namespace_options=mapping)) self.assertEqual(set(helper.map_db('a')), set(['a', 'b', 'c'])) self.assertEqual(helper.map_db('d'), []) self.assertEqual(helper.map_namespace('a.x'), 'b.x') self.assertEqual(helper.map_namespace('a.z'), 'a.z') self.assertEqual(helper.map_namespace('d.x'), None) self.assertEqual(helper.map_collection('a', 'x'), ('b', 'x')) self.assertEqual(helper.map_collection('a', 'z'), ('a', 'z')) self.assertEqual(helper.map_collection('d', 'x'), (None, None))
def test_commands(self): # Also test with namespace mapping. # Note that mongo-connector does not currently support commands after # renaming a database. self.mongo_doc.command_helper = CommandHelper( namespace_set=['test.test', 'test.test2', 'test.drop'], dest_mapping={ 'test.test': 'test.othertest', 'test.drop': 'dropped.collection' }) try: self.mongo_doc.handle_command({'create': 'test'}, *TESTARGS) self.assertIn('othertest', self.mongo_conn['test'].collection_names()) self.mongo_doc.handle_command( {'renameCollection': 'test.test', 'to': 'test.test2'}, 'admin.$cmd', 1) self.assertNotIn('othertest', self.mongo_conn['test'].collection_names()) self.assertIn('test2', self.mongo_conn['test'].collection_names()) self.mongo_doc.handle_command( {'drop': 'test2'}, 'test.$cmd', 1) self.assertNotIn('test2', self.mongo_conn['test'].collection_names()) self.assertIn('test', self.mongo_conn.database_names()) self.mongo_doc.handle_command({'dropDatabase': 1}, 'test.$cmd', 1) self.assertNotIn('test', self.mongo_conn.database_names()) # Briefly test mapped database name with dropDatabase command. self.mongo_conn.dropped.collection.insert({'a': 1}) self.assertIn('dropped', self.mongo_conn.database_names()) self.mongo_doc.handle_command({'dropDatabase': 1}, 'test.$cmd', 1) self.assertNotIn('dropped', self.mongo_conn.database_names()) finally: self.mongo_conn.drop_database('test')
def test_command_helper(self): mapping = { 'a.x': 'b.x', 'a.y': 'c.y' } # Replace the origin dest_mapping dest_mapping_stru = DestMapping(list(mapping) + ['a.z'], [], mapping) helper = CommandHelper(dest_mapping_stru) self.assertEqual(set(helper.map_db('a')), set(['a', 'b', 'c'])) self.assertEqual(helper.map_db('d'), []) self.assertEqual(helper.map_namespace('a.x'), 'b.x') self.assertEqual(helper.map_namespace('a.z'), 'a.z') self.assertEqual(helper.map_namespace('d.x'), None) self.assertEqual(helper.map_collection('a', 'x'), ('b', 'x')) self.assertEqual(helper.map_collection('a', 'z'), ('a', 'z')) self.assertEqual(helper.map_collection('d', 'x'), (None, None))
def __init__(self, mongo_address, doc_managers=None, **kwargs): super(Connector, self).__init__() # can_run is set to false when we join the thread self.can_run = True # The signal that caused the connector to stop or None self.signal = None # main address - either mongos for sharded setups or a primary otherwise self.address = mongo_address # connection to the main address self.main_conn = None # List of DocManager instances if doc_managers: self.doc_managers = doc_managers else: LOG.warning('No doc managers specified, using simulator.') self.doc_managers = (simulator.DocManager(),) # Password for authentication self.auth_key = kwargs.pop('auth_key', None) # Username for authentication self.auth_username = kwargs.pop('auth_username', None) # The name of the file that stores the progress of the OplogThreads self.oplog_checkpoint = kwargs.pop('oplog_checkpoint', 'oplog.timestamp') # The set of OplogThreads created self.shard_set = {} # Dict of OplogThread/timestamp pairs to record progress self.oplog_progress = LockingDict() # Timezone awareness self.tz_aware = kwargs.get('tz_aware', False) # SSL keyword arguments to MongoClient. ssl_certfile = kwargs.pop('ssl_certfile', None) ssl_ca_certs = kwargs.pop('ssl_ca_certs', None) ssl_keyfile = kwargs.pop('ssl_keyfile', None) ssl_cert_reqs = kwargs.pop('ssl_cert_reqs', None) self.ssl_kwargs = {} if ssl_certfile: self.ssl_kwargs['ssl_certfile'] = ssl_certfile if ssl_ca_certs: self.ssl_kwargs['ssl_ca_certs'] = ssl_ca_certs if ssl_keyfile: self.ssl_kwargs['ssl_keyfile'] = ssl_keyfile if ssl_cert_reqs: self.ssl_kwargs['ssl_cert_reqs'] = ssl_cert_reqs # Save the rest of kwargs. self.kwargs = kwargs # Replace the origin dest_mapping self.dest_mapping = DestMapping(kwargs.get('ns_set', []), kwargs.get('ex_ns_set', []), kwargs.get('dest_mapping', {})) # Initialize and set the command helper command_helper = CommandHelper(self.dest_mapping) for dm in self.doc_managers: dm.command_helper = command_helper if self.oplog_checkpoint is not None: if not os.path.exists(self.oplog_checkpoint): info_str = ("MongoConnector: Can't find %s, " "attempting to create an empty progress log" % self.oplog_checkpoint) LOG.warning(info_str) try: # Create oplog progress file open(self.oplog_checkpoint, "w").close() except IOError as e: LOG.critical("MongoConnector: Could not " "create a progress log: %s" % str(e)) sys.exit(2) else: if (not os.access(self.oplog_checkpoint, os.W_OK) and not os.access(self.oplog_checkpoint, os.R_OK)): LOG.critical("Invalid permissions on %s! Exiting" % (self.oplog_checkpoint)) sys.exit(2)
class DocManager(DocManagerBase): """ArangoDB implementation of the DocManager interface. Receives documents from an OplogThread and takes the appropriate actions on ArangoDB. """ def __init__(self, url, auto_commit_interval=None, chunk_size=constants.DEFAULT_MAX_BULK, **kwargs): """ Verify URL and establish a connection. """ self.url = url self.auto_commit_interval = auto_commit_interval self.unique_key = '_key' self.chunk_size = chunk_size self.kwargs = kwargs self.arango = self.create_connection() # define meta database and meta collection names self.meta_database = "mongodb_meta" self.meta_collection = "mongodb_data" # check if meta database and meta collection are already present, # if not then create both if self.meta_database not in self.arango.databases(): self.meta_database = self.arango.create_database( self.meta_database) self.meta_database.create_collection(self.meta_collection) else: self.meta_database = self.arango.db(self.meta_database) self.command_helper = CommandHelper() def verify_connection(self, connection): try: connection.verify() except ServerConnectionError: raise ServerConnectionError( "\nSeems that ArangoDB is running with Authentication.\n" "Please run the following,\n" "connector_arango_auth set, to set\n" "connector_arango_auth reset, to reset\n" "connector_arango_auth flush, to disable\n" "followed by source ~/.bashrc\n" "the ArangoDB authentication\n" "Refer readme.rst for more details") return @wrap_exceptions def create_connection(self): """Creates ArangoDB connection """ # Extract host and port from URL host, port = self.get_host_port(self.url) # Extract Arango username and password from environment variable arango_username = os.environ.get('USER_ARANGO') arango_password = os.environ.get('PASSWD_ARANGO') if not arango_username and not arango_password: # Create Arrango connection arango_connection = ArangoClient(host=host, port=port) self.verify_connection(arango_connection) elif (not arango_username and arango_password) or \ (arango_username and not arango_password): raise Exception("Invalid credentials, ArangoDB username/" "password can't be blank") elif arango_username and arango_password: # Create Arrango connection arango_connection = ArangoClient(host=host, port=port, username=arango_username, password=arango_password) self.verify_connection(arango_connection) return arango_connection @wrap_exceptions def get_host_port(self, address): """Extracts host and port from URL """ address_list = address.split(':') host = address_list[0] port = int(address_list[1]) return host, port @wrap_exceptions def check_if_database_exists(self, database): """Checks if database exists """ databases = self.arango.databases() if database in databases: return True return False @wrap_exceptions def check_if_collection_exists(self, database, coll): """Checks if collection exists """ if not isinstance(database, arango.database.Database): database = self.arango.db(database) collections = database.collections() for item in collections: if item['name'] == coll: return True return False def apply_update(self, doc, update_spec): """Performs necessary update operations on the document and returns the updated document """ if "$set" not in update_spec and "$unset" not in update_spec: # Don't try to add ns and _ts fields back in from doc return update_spec return super(DocManager, self).apply_update(doc, update_spec) def _db_and_collection(self, namespace): """Extracts the database and collection name from namespace string """ return namespace.split('.', 1) def stop(self): """Stops any running threads """ LOG.info( "Mongo DocManager Stopped: If you will not target this system " "again with mongo-connector then you may drop the database " "__mongo_connector, which holds metadata for Mongo Connector.") @wrap_exceptions def handle_command(self, doc, namespace, timestamp): """Handles operations at database as well as collection level like, create database, delete database, create collection, delete collection and rename collection """ db, _ = self._db_and_collection(namespace) if doc.get('dropDatabase'): for new_db in self.command_helper.map_db(db): self.arango.delete_database(new_db) if doc.get('renameCollection'): source_namespace = self.command_helper.map_namespace( doc['renameCollection']) source_db, source_coll = self._db_and_collection(source_namespace) target_namespace = self.command_helper.map_namespace(doc['to']) target_db, target_coll = self._db_and_collection(target_namespace) if source_namespace and target_coll: source_db = self.arango.db(source_db) source_coll = source_db.collection(source_coll) source_coll.rename(target_coll) if doc.get('create'): new_db, coll = self.command_helper.map_collection( db, doc['create']) if new_db: db_response = self.check_if_database_exists(new_db) if not db_response: new_db = self.arango.create_database(db) coll_response = self.check_if_collection_exists(new_db, coll) if not coll_response: if not isinstance(new_db, arango.database.Database): new_db = self.arango.db(new_db) new_db.create_collection(coll) if doc.get('drop'): new_db, coll = self.command_helper.map_collection(db, doc['drop']) new_db = self.arango.db(new_db) new_db.delete_collection(coll) @wrap_exceptions def update(self, document_id, update_spec, namespace, timestamp): """Apply updates given in update_spec to the document whose id matches that of doc. """ document_id = self.pre_process_id(document_id) db, coll = self._db_and_collection(namespace) database = self.arango.db(db) coll = database.collection(coll) meta_collection = self.meta_database.collection(self.meta_collection) meta = { self.unique_key: document_id, "_ts": timestamp, "ns": namespace } meta_cursor = meta_collection.find({ self.unique_key: document_id, "ns": namespace }) if meta_cursor.count(): meta_collection.replace(meta) cursor = coll.find({self.unique_key: document_id}) if cursor.count(): document = cursor.next() updated = self.apply_update(document, update_spec) updated['_id'] = document_id self.upsert(updated, namespace, timestamp) else: LOG.error("The document %s, which you are trying to update \ is missing in ArangoDB" % document_id) @wrap_exceptions def upsert(self, doc, namespace, timestamp): """Update or insert a document into Mongo """ # get database and collection name from namespace database, coll = self._db_and_collection(namespace) doc_ = {"create": coll} self.handle_command(doc_, namespace, timestamp) # get database instance database = self.arango.db(database) # get collection instance coll = database.collection(coll) # covert doc_id to string type from bson.objectid.ObjectId type doc_id = self.pre_process_id(doc.get('_id')) # pop _id from document doc.pop('_id') # get meta_collection instance meta_collection = self.meta_database.collection(self.meta_collection) # create meta for inserting into meta collection meta = {self.unique_key: doc_id, "_ts": timestamp, "ns": namespace} # check if the doc with given doc_id is already present in ArangoDB meta_cursor = meta_collection.find({ self.unique_key: doc_id, "ns": namespace }) if meta_cursor.count(): # replace existing doc meta_collection.replace(meta) else: # insert new doc meta_collection.insert(meta) # update "_id" field's value as "_key" field's value, as ArangoDB \ # keeps "_key" as an unique key across the collection doc.update({self.unique_key: doc_id}) if coll.has(doc_id): # replace existing doc coll.replace(doc) else: # insert new doc coll.insert(doc) def pre_process_id(self, doc_id): """Coverts doc id into string and ultimately into the format required for _key field """ doc_id = str(doc_id) if '.' not in doc_id: doc_id = doc_id + '.' return doc_id @wrap_exceptions def bulk_upsert(self, docs, namespace, timestamp): """Performs bulk insert operations """ dbname, collname = self._db_and_collection(namespace) doc = {"create": collname} self.handle_command(doc, namespace, timestamp) dbname = self.arango.db(dbname) collname = dbname.collection(collname) meta_coll = self.meta_database.collection(self.meta_collection) def iterate_chunks(): more_chunks = True while more_chunks: bulk = [] bulk_meta = [] for i in range(self.chunk_size): try: doc = next(docs) doc_id = doc.get('_id') doc_id = self.pre_process_id(doc_id) doc.pop('_id') doc.update({self.unique_key: doc_id}) bulk.append(doc) bulk_meta.append({ self.unique_key: doc_id, 'ns': namespace, '_ts': timestamp }) except StopIteration: more_chunks = False if i > 0: yield bulk, bulk_meta, collname, meta_coll break if more_chunks: yield bulk, bulk_meta, collname, meta_coll for bulk_op, meta_bulk_op, collname, meta_coll in iterate_chunks(): collname.import_bulk(documents=bulk_op, on_duplicate="replace") meta_coll.import_bulk(documents=meta_bulk_op, on_duplicate="replace") @wrap_exceptions def remove(self, document_id, namespace, timestamp): """Removes document from Mongo The input is a python dictionary that represents a mongo document. The documents has ns and _ts fields. """ document_id = self.pre_process_id(document_id) database, coll = self._db_and_collection(namespace) database = self.arango.db(database) coll = database.collection(coll) meta_collection = self.meta_database.collection(self.meta_collection) meta_collection.delete(document_id) coll.delete(document_id) @wrap_exceptions def search(self, start_ts, end_ts): """Query ArangoDB for documents in a time range. This method is used to find documents that may be n conflict during a rollback event in MongoDB. """ meta_coll = self.meta_database.collection(self.meta_collection) for doc in meta_coll.find_in_range("_ts", start_ts, end_ts): yield doc @wrap_exceptions def get_last_doc(self): """Return the document most recently modified in the target system """ query = 'FOR doc IN {} SORT doc._ts DESC LIMIT 1 RETURN doc'.format( self.meta_collection) result = self.meta_database.aql.execute(query) for doc in result: return doc