def test_command_helper(self): # Databases cannot be merged mapping = { 'a.x': 'c.x', 'b.x': 'c.y' } self.assertRaises(errors.MongoConnectorError, CommandHelper, list(mapping), mapping) mapping = { 'a.x': 'b.x', 'a.y': 'c.y' } helper = CommandHelper(list(mapping) + ['a.z'], mapping) self.assertEqual(set(helper.map_db('a')), set(['a', 'b', 'c'])) self.assertEqual(helper.map_db('d'), []) self.assertEqual(helper.map_namespace('a.x'), 'b.x') self.assertEqual(helper.map_namespace('a.z'), 'a.z') self.assertEqual(helper.map_namespace('d.x'), None) self.assertEqual(helper.map_collection('a', 'x'), ('b', 'x')) self.assertEqual(helper.map_collection('a', 'z'), ('a', 'z')) self.assertEqual(helper.map_collection('d', 'x'), (None, None))
def test_command_helper(self): mapping = {"a.x": "b.x", "a.y": "c.y"} helper = CommandHelper( NamespaceConfig(namespace_set=list(mapping) + ["a.z"], namespace_options=mapping)) self.assertEqual(set(helper.map_db("a")), set(["a", "b", "c"])) self.assertEqual(helper.map_db("d"), []) self.assertEqual(helper.map_namespace("a.x"), "b.x") self.assertEqual(helper.map_namespace("a.z"), "a.z") self.assertEqual(helper.map_namespace("d.x"), None) self.assertEqual(helper.map_collection("a", "x"), ("b", "x")) self.assertEqual(helper.map_collection("a", "z"), ("a", "z")) self.assertEqual(helper.map_collection("d", "x"), (None, None))
def test_command_helper(self): mapping = {'a.x': 'b.x', 'a.y': 'c.y'} helper = CommandHelper( NamespaceConfig(namespace_set=list(mapping) + ['a.z'], namespace_options=mapping)) self.assertEqual(set(helper.map_db('a')), set(['a', 'b', 'c'])) self.assertEqual(helper.map_db('d'), []) self.assertEqual(helper.map_namespace('a.x'), 'b.x') self.assertEqual(helper.map_namespace('a.z'), 'a.z') self.assertEqual(helper.map_namespace('d.x'), None) self.assertEqual(helper.map_collection('a', 'x'), ('b', 'x')) self.assertEqual(helper.map_collection('a', 'z'), ('a', 'z')) self.assertEqual(helper.map_collection('d', 'x'), (None, None))
def test_command_helper(self): # Databases cannot be merged mapping = {'a.x': 'c.x', 'b.x': 'c.y'} self.assertRaises(errors.MongoConnectorError, CommandHelper, list(mapping), mapping) mapping = {'a.x': 'b.x', 'a.y': 'c.y'} helper = CommandHelper(list(mapping) + ['a.z'], mapping) self.assertEqual(set(helper.map_db('a')), set(['a', 'b', 'c'])) self.assertEqual(helper.map_db('d'), []) self.assertEqual(helper.map_namespace('a.x'), 'b.x') self.assertEqual(helper.map_namespace('a.z'), 'a.z') self.assertEqual(helper.map_namespace('d.x'), None) self.assertEqual(helper.map_collection('a', 'x'), ('b', 'x')) self.assertEqual(helper.map_collection('a', 'z'), ('a', 'z')) self.assertEqual(helper.map_collection('d', 'x'), (None, None))
def test_command_helper(self): mapping = {'a.x': 'b.x', 'a.y': 'c.y'} # Replace the origin dest_mapping dest_mapping_stru = DestMapping(list(mapping) + ['a.z'], [], mapping) helper = CommandHelper(dest_mapping_stru) self.assertEqual(set(helper.map_db('a')), set(['a', 'b', 'c'])) self.assertEqual(helper.map_db('d'), []) self.assertEqual(helper.map_namespace('a.x'), 'b.x') self.assertEqual(helper.map_namespace('a.z'), 'a.z') self.assertEqual(helper.map_namespace('d.x'), None) self.assertEqual(helper.map_collection('a', 'x'), ('b', 'x')) self.assertEqual(helper.map_collection('a', 'z'), ('a', 'z')) self.assertEqual(helper.map_collection('d', 'x'), (None, None))
def test_command_helper(self): mapping = { 'a.x': 'b.x', 'a.y': 'c.y' } helper = CommandHelper(NamespaceConfig( namespace_set=list(mapping) + ['a.z'], namespace_options=mapping)) self.assertEqual(set(helper.map_db('a')), set(['a', 'b', 'c'])) self.assertEqual(helper.map_db('d'), []) self.assertEqual(helper.map_namespace('a.x'), 'b.x') self.assertEqual(helper.map_namespace('a.z'), 'a.z') self.assertEqual(helper.map_namespace('d.x'), None) self.assertEqual(helper.map_collection('a', 'x'), ('b', 'x')) self.assertEqual(helper.map_collection('a', 'z'), ('a', 'z')) self.assertEqual(helper.map_collection('d', 'x'), (None, None))
def test_command_helper(self): mapping = {"a.x": "b.x", "a.y": "c.y"} helper = CommandHelper( NamespaceConfig( namespace_set=list(mapping) + ["a.z"], namespace_options=mapping ) ) self.assertEqual(set(helper.map_db("a")), set(["a", "b", "c"])) self.assertEqual(helper.map_db("d"), []) self.assertEqual(helper.map_namespace("a.x"), "b.x") self.assertEqual(helper.map_namespace("a.z"), "a.z") self.assertEqual(helper.map_namespace("d.x"), None) self.assertEqual(helper.map_collection("a", "x"), ("b", "x")) self.assertEqual(helper.map_collection("a", "z"), ("a", "z")) self.assertEqual(helper.map_collection("d", "x"), (None, None))
def test_command_helper(self): mapping = { 'a.x': 'b.x', 'a.y': 'c.y' } # Replace the origin dest_mapping dest_mapping_stru = DestMapping(list(mapping) + ['a.z'], [], mapping) helper = CommandHelper(dest_mapping_stru) self.assertEqual(set(helper.map_db('a')), set(['a', 'b', 'c'])) self.assertEqual(helper.map_db('d'), []) self.assertEqual(helper.map_namespace('a.x'), 'b.x') self.assertEqual(helper.map_namespace('a.z'), 'a.z') self.assertEqual(helper.map_namespace('d.x'), None) self.assertEqual(helper.map_collection('a', 'x'), ('b', 'x')) self.assertEqual(helper.map_collection('a', 'z'), ('a', 'z')) self.assertEqual(helper.map_collection('d', 'x'), (None, None))
class DocManager(DocManagerBase): """ArangoDB implementation of the DocManager interface. Receives documents from an OplogThread and takes the appropriate actions on ArangoDB. """ def __init__(self, url, auto_commit_interval=None, chunk_size=constants.DEFAULT_MAX_BULK, **kwargs): """ Verify URL and establish a connection. """ self.url = url self.auto_commit_interval = auto_commit_interval self.unique_key = '_key' self.chunk_size = chunk_size self.kwargs = kwargs self.arango = self.create_connection() # define meta database and meta collection names self.meta_database = "mongodb_meta" self.meta_collection = "mongodb_data" # check if meta database and meta collection are already present, # if not then create both if self.meta_database not in self.arango.databases(): self.meta_database = self.arango.create_database( self.meta_database) self.meta_database.create_collection(self.meta_collection) else: self.meta_database = self.arango.db(self.meta_database) self.command_helper = CommandHelper() def verify_connection(self, connection): try: connection.verify() except ServerConnectionError: raise ServerConnectionError( "\nSeems that ArangoDB is running with Authentication.\n" "Please run the following,\n" "connector_arango_auth set, to set\n" "connector_arango_auth reset, to reset\n" "connector_arango_auth flush, to disable\n" "followed by source ~/.bashrc\n" "the ArangoDB authentication\n" "Refer readme.rst for more details") return @wrap_exceptions def create_connection(self): """Creates ArangoDB connection """ # Extract host and port from URL host, port = self.get_host_port(self.url) # Extract Arango username and password from environment variable arango_username = os.environ.get('USER_ARANGO') arango_password = os.environ.get('PASSWD_ARANGO') if not arango_username and not arango_password: # Create Arrango connection arango_connection = ArangoClient(host=host, port=port) self.verify_connection(arango_connection) elif (not arango_username and arango_password) or \ (arango_username and not arango_password): raise Exception("Invalid credentials, ArangoDB username/" "password can't be blank") elif arango_username and arango_password: # Create Arrango connection arango_connection = ArangoClient(host=host, port=port, username=arango_username, password=arango_password) self.verify_connection(arango_connection) return arango_connection @wrap_exceptions def get_host_port(self, address): """Extracts host and port from URL """ address_list = address.split(':') host = address_list[0] port = int(address_list[1]) return host, port @wrap_exceptions def check_if_database_exists(self, database): """Checks if database exists """ databases = self.arango.databases() if database in databases: return True return False @wrap_exceptions def check_if_collection_exists(self, database, coll): """Checks if collection exists """ if not isinstance(database, arango.database.Database): database = self.arango.db(database) collections = database.collections() for item in collections: if item['name'] == coll: return True return False def apply_update(self, doc, update_spec): """Performs necessary update operations on the document and returns the updated document """ if "$set" not in update_spec and "$unset" not in update_spec: # Don't try to add ns and _ts fields back in from doc return update_spec return super(DocManager, self).apply_update(doc, update_spec) def _db_and_collection(self, namespace): """Extracts the database and collection name from namespace string """ return namespace.split('.', 1) def stop(self): """Stops any running threads """ LOG.info( "Mongo DocManager Stopped: If you will not target this system " "again with mongo-connector then you may drop the database " "__mongo_connector, which holds metadata for Mongo Connector.") @wrap_exceptions def handle_command(self, doc, namespace, timestamp): """Handles operations at database as well as collection level like, create database, delete database, create collection, delete collection and rename collection """ db, _ = self._db_and_collection(namespace) if doc.get('dropDatabase'): for new_db in self.command_helper.map_db(db): self.arango.delete_database(new_db) if doc.get('renameCollection'): source_namespace = self.command_helper.map_namespace( doc['renameCollection']) source_db, source_coll = self._db_and_collection(source_namespace) target_namespace = self.command_helper.map_namespace(doc['to']) target_db, target_coll = self._db_and_collection(target_namespace) if source_namespace and target_coll: source_db = self.arango.db(source_db) source_coll = source_db.collection(source_coll) source_coll.rename(target_coll) if doc.get('create'): new_db, coll = self.command_helper.map_collection( db, doc['create']) if new_db: db_response = self.check_if_database_exists(new_db) if not db_response: new_db = self.arango.create_database(db) coll_response = self.check_if_collection_exists(new_db, coll) if not coll_response: if not isinstance(new_db, arango.database.Database): new_db = self.arango.db(new_db) new_db.create_collection(coll) if doc.get('drop'): new_db, coll = self.command_helper.map_collection(db, doc['drop']) new_db = self.arango.db(new_db) new_db.delete_collection(coll) @wrap_exceptions def update(self, document_id, update_spec, namespace, timestamp): """Apply updates given in update_spec to the document whose id matches that of doc. """ document_id = self.pre_process_id(document_id) db, coll = self._db_and_collection(namespace) database = self.arango.db(db) coll = database.collection(coll) meta_collection = self.meta_database.collection(self.meta_collection) meta = { self.unique_key: document_id, "_ts": timestamp, "ns": namespace } meta_cursor = meta_collection.find({ self.unique_key: document_id, "ns": namespace }) if meta_cursor.count(): meta_collection.replace(meta) cursor = coll.find({self.unique_key: document_id}) if cursor.count(): document = cursor.next() updated = self.apply_update(document, update_spec) updated['_id'] = document_id self.upsert(updated, namespace, timestamp) else: LOG.error("The document %s, which you are trying to update \ is missing in ArangoDB" % document_id) @wrap_exceptions def upsert(self, doc, namespace, timestamp): """Update or insert a document into Mongo """ # get database and collection name from namespace database, coll = self._db_and_collection(namespace) doc_ = {"create": coll} self.handle_command(doc_, namespace, timestamp) # get database instance database = self.arango.db(database) # get collection instance coll = database.collection(coll) # covert doc_id to string type from bson.objectid.ObjectId type doc_id = self.pre_process_id(doc.get('_id')) # pop _id from document doc.pop('_id') # get meta_collection instance meta_collection = self.meta_database.collection(self.meta_collection) # create meta for inserting into meta collection meta = {self.unique_key: doc_id, "_ts": timestamp, "ns": namespace} # check if the doc with given doc_id is already present in ArangoDB meta_cursor = meta_collection.find({ self.unique_key: doc_id, "ns": namespace }) if meta_cursor.count(): # replace existing doc meta_collection.replace(meta) else: # insert new doc meta_collection.insert(meta) # update "_id" field's value as "_key" field's value, as ArangoDB \ # keeps "_key" as an unique key across the collection doc.update({self.unique_key: doc_id}) if coll.has(doc_id): # replace existing doc coll.replace(doc) else: # insert new doc coll.insert(doc) def pre_process_id(self, doc_id): """Coverts doc id into string and ultimately into the format required for _key field """ doc_id = str(doc_id) if '.' not in doc_id: doc_id = doc_id + '.' return doc_id @wrap_exceptions def bulk_upsert(self, docs, namespace, timestamp): """Performs bulk insert operations """ dbname, collname = self._db_and_collection(namespace) doc = {"create": collname} self.handle_command(doc, namespace, timestamp) dbname = self.arango.db(dbname) collname = dbname.collection(collname) meta_coll = self.meta_database.collection(self.meta_collection) def iterate_chunks(): more_chunks = True while more_chunks: bulk = [] bulk_meta = [] for i in range(self.chunk_size): try: doc = next(docs) doc_id = doc.get('_id') doc_id = self.pre_process_id(doc_id) doc.pop('_id') doc.update({self.unique_key: doc_id}) bulk.append(doc) bulk_meta.append({ self.unique_key: doc_id, 'ns': namespace, '_ts': timestamp }) except StopIteration: more_chunks = False if i > 0: yield bulk, bulk_meta, collname, meta_coll break if more_chunks: yield bulk, bulk_meta, collname, meta_coll for bulk_op, meta_bulk_op, collname, meta_coll in iterate_chunks(): collname.import_bulk(documents=bulk_op, on_duplicate="replace") meta_coll.import_bulk(documents=meta_bulk_op, on_duplicate="replace") @wrap_exceptions def remove(self, document_id, namespace, timestamp): """Removes document from Mongo The input is a python dictionary that represents a mongo document. The documents has ns and _ts fields. """ document_id = self.pre_process_id(document_id) database, coll = self._db_and_collection(namespace) database = self.arango.db(database) coll = database.collection(coll) meta_collection = self.meta_database.collection(self.meta_collection) meta_collection.delete(document_id) coll.delete(document_id) @wrap_exceptions def search(self, start_ts, end_ts): """Query ArangoDB for documents in a time range. This method is used to find documents that may be n conflict during a rollback event in MongoDB. """ meta_coll = self.meta_database.collection(self.meta_collection) for doc in meta_coll.find_in_range("_ts", start_ts, end_ts): yield doc @wrap_exceptions def get_last_doc(self): """Return the document most recently modified in the target system """ query = 'FOR doc IN {} SORT doc._ts DESC LIMIT 1 RETURN doc'.format( self.meta_collection) result = self.meta_database.aql.execute(query) for doc in result: return doc