Esempio n. 1
0
    def __init__(self,
                 url,
                 auto_commit_interval=None,
                 chunk_size=constants.DEFAULT_MAX_BULK,
                 **kwargs):
        """ Verify URL and establish a connection.
        """
        self.url = url
        self.auto_commit_interval = auto_commit_interval
        self.unique_key = '_key'
        self.chunk_size = chunk_size
        self.kwargs = kwargs

        self.arango = self.create_connection()

        # define meta database and meta collection names
        self.meta_database = "mongodb_meta"
        self.meta_collection = "mongodb_data"

        # check if meta database and meta collection are already present,
        # if not then create both
        if self.meta_database not in self.arango.databases():
            self.meta_database = self.arango.create_database(
                self.meta_database)
            self.meta_database.create_collection(self.meta_collection)
        else:
            self.meta_database = self.arango.db(self.meta_database)

        self.command_helper = CommandHelper()
    def test_command_helper(self):
        # Databases cannot be merged
        mapping = {
            'a.x': 'c.x',
            'b.x': 'c.y'
        }
        self.assertRaises(errors.MongoConnectorError,
                          CommandHelper,
                          list(mapping), mapping)

        mapping = {
            'a.x': 'b.x',
            'a.y': 'c.y'
        }
        helper = CommandHelper(list(mapping) + ['a.z'], mapping)

        self.assertEqual(set(helper.map_db('a')), set(['a', 'b', 'c']))
        self.assertEqual(helper.map_db('d'), [])

        self.assertEqual(helper.map_namespace('a.x'), 'b.x')
        self.assertEqual(helper.map_namespace('a.z'), 'a.z')
        self.assertEqual(helper.map_namespace('d.x'), None)

        self.assertEqual(helper.map_collection('a', 'x'), ('b', 'x'))
        self.assertEqual(helper.map_collection('a', 'z'), ('a', 'z'))
        self.assertEqual(helper.map_collection('d', 'x'), (None, None))
Esempio n. 3
0
    def test_commands(self):
        cmd_args = ("test.$cmd", 1)
        self.elastic_doc.command_helper = CommandHelper()

        self.elastic_doc.handle_command({"create": "test2"}, *cmd_args)
        retry_until_ok(self.elastic_conn.indices.refresh, index="")
        self.assertIn("test2", self._mappings("test"))

        docs = [
            {
                "_id": 0,
                "name": "ted"
            },
            {
                "_id": 1,
                "name": "marsha"
            },
            {
                "_id": 2,
                "name": "nikolas"
            },
        ]
        self.elastic_doc.upsert(docs[0], "test.test2", 1)
        self.elastic_doc.upsert(docs[1], "test.test2", 1)
        self.elastic_doc.upsert(docs[2], "test.test2", 1)

        # Commit upserted docs as they are in buffer
        self.elastic_doc.commit()

        res = list(
            self.elastic_doc._stream_search(index="test",
                                            doc_type="test2",
                                            body={"query": {
                                                "match_all": {}
                                            }}))
        for d in docs:
            self.assertTrue(d in res)

        self.elastic_doc.handle_command({"drop": "test2"}, *cmd_args)
        retry_until_ok(self.elastic_conn.indices.refresh, index="")
        res = list(
            self.elastic_doc._stream_search(index="test",
                                            doc_type="test2",
                                            body={"query": {
                                                "match_all": {}
                                            }}))
        self.assertEqual(0, len(res))

        self.elastic_doc.handle_command({"create": "test2"}, *cmd_args)
        self.elastic_doc.handle_command({"create": "test3"}, *cmd_args)
        retry_until_ok(self.elastic_conn.indices.refresh, index="")
        self.elastic_doc.handle_command({"dropDatabase": 1}, *cmd_args)
        retry_until_ok(self.elastic_conn.indices.refresh, index="")
        self.assertNotIn("test", self._indices())
        self.assertNotIn("test2", self._mappings())
        self.assertNotIn("test3", self._mappings())
Esempio n. 4
0
 def initOplogThread(self, namespace_set=[], dest_mapping={}):
     self.docman = CommandLoggerDocManager()
     self.docman.command_helper = CommandHelper(namespace_set, dest_mapping)
     self.opman = OplogThread(primary_client=self.primary_conn,
                              doc_managers=(self.docman, ),
                              oplog_progress_dict=self.oplog_progress,
                              namespace_set=namespace_set,
                              dest_mapping=dest_mapping,
                              collection_dump=False)
     self.opman.start()
Esempio n. 5
0
    def initOplogThread(self, namespace_set=None):
        self.docman = CommandLoggerDocManager()
        namespace_config = NamespaceConfig(namespace_set=namespace_set)

        self.docman.command_helper = CommandHelper(namespace_config)
        self.opman = OplogThread(primary_client=self.primary_conn,
                                 doc_managers=(self.docman, ),
                                 oplog_progress_dict=self.oplog_progress,
                                 namespace_config=namespace_config,
                                 collection_dump=False)
        self.opman.start()
    def test_commands(self):
        cmd_args = ('test.$cmd', 1)
        self.elastic_doc.command_helper = CommandHelper()

        self.elastic_doc.handle_command({'create': 'test2'}, *cmd_args)
        time.sleep(1)
        self.assertIn('test2', self._mappings('test'))

        docs = [{
            "_id": 0,
            "name": "ted"
        }, {
            "_id": 1,
            "name": "marsha"
        }, {
            "_id": 2,
            "name": "nikolas"
        }]
        self.elastic_doc.upsert(docs[0], 'test.test2', 1)
        self.elastic_doc.upsert(docs[1], 'test.test2', 1)
        self.elastic_doc.upsert(docs[2], 'test.test2', 1)
        res = list(
            self.elastic_doc._stream_search(index="test",
                                            doc_type='test2',
                                            body={"query": {
                                                "match_all": {}
                                            }}))
        for d in docs:
            self.assertTrue(d in res)

        self.elastic_doc.handle_command({'drop': 'test2'}, *cmd_args)
        time.sleep(3)
        res = list(
            self.elastic_doc._stream_search(index="test",
                                            doc_type='test2',
                                            body={"query": {
                                                "match_all": {}
                                            }}))
        self.assertEqual(0, len(res))

        self.elastic_doc.handle_command({'create': 'test2'}, *cmd_args)
        self.elastic_doc.handle_command({'create': 'test3'}, *cmd_args)
        time.sleep(1)
        self.elastic_doc.handle_command({'dropDatabase': 1}, *cmd_args)
        time.sleep(1)
        self.assertNotIn('test', self._indices())
        self.assertNotIn('test2', self._mappings())
        self.assertNotIn('test3', self._mappings())
Esempio n. 7
0
    def test_commands(self):
        # Also test with namespace mapping.
        # Note that mongo-connector does not currently support commands after
        # renaming a database.
        namespace_config = NamespaceConfig(
            namespace_set=['test.test', 'test.test2', 'test.drop'],
            namespace_options={
                'test.test': 'test.othertest',
                'test.drop': 'dropped.collection'
            })
        self.choosy_docman.command_helper = CommandHelper(namespace_config)

        try:
            self.choosy_docman.handle_command({'create': 'test'}, *TESTARGS)
            self.assertIn('othertest',
                          self.mongo_conn['test'].collection_names())
            self.choosy_docman.handle_command(
                {
                    'renameCollection': 'test.test',
                    'to': 'test.test2'
                }, 'admin.$cmd', 1)
            self.assertNotIn('othertest',
                             self.mongo_conn['test'].collection_names())
            self.assertIn('test2', self.mongo_conn['test'].collection_names())

            self.choosy_docman.handle_command({'drop': 'test2'}, 'test.$cmd',
                                              1)
            self.assertNotIn('test2',
                             self.mongo_conn['test'].collection_names())

            # WiredTiger drops the database when the last collection is
            # dropped.
            if 'test' not in self.mongo_conn.database_names():
                self.choosy_docman.handle_command({'create': 'test'},
                                                  *TESTARGS)
            self.assertIn('test', self.mongo_conn.database_names())
            self.choosy_docman.handle_command({'dropDatabase': 1}, 'test.$cmd',
                                              1)
            self.assertNotIn('test', self.mongo_conn.database_names())

            # Briefly test mapped database name with dropDatabase command.
            self.mongo_conn.dropped.collection.insert_one({'a': 1})
            self.assertIn('dropped', self.mongo_conn.database_names())
            self.choosy_docman.handle_command({'dropDatabase': 1}, 'test.$cmd',
                                              1)
            self.assertNotIn('dropped', self.mongo_conn.database_names())
        finally:
            self.mongo_conn.drop_database('test')
Esempio n. 8
0
    def test_command_helper(self):

        mapping = {'a.x': 'b.x', 'a.y': 'c.y'}

        helper = CommandHelper(
            NamespaceConfig(namespace_set=list(mapping) + ['a.z'],
                            namespace_options=mapping))

        self.assertEqual(set(helper.map_db('a')), set(['a', 'b', 'c']))
        self.assertEqual(helper.map_db('d'), [])

        self.assertEqual(helper.map_namespace('a.x'), 'b.x')
        self.assertEqual(helper.map_namespace('a.z'), 'a.z')
        self.assertEqual(helper.map_namespace('d.x'), None)

        self.assertEqual(helper.map_collection('a', 'x'), ('b', 'x'))
        self.assertEqual(helper.map_collection('a', 'z'), ('a', 'z'))
        self.assertEqual(helper.map_collection('d', 'x'), (None, None))
    def test_command_helper(self):

        mapping = {"a.x": "b.x", "a.y": "c.y"}

        helper = CommandHelper(
            NamespaceConfig(namespace_set=list(mapping) + ["a.z"],
                            namespace_options=mapping))

        self.assertEqual(set(helper.map_db("a")), set(["a", "b", "c"]))
        self.assertEqual(helper.map_db("d"), [])

        self.assertEqual(helper.map_namespace("a.x"), "b.x")
        self.assertEqual(helper.map_namespace("a.z"), "a.z")
        self.assertEqual(helper.map_namespace("d.x"), None)

        self.assertEqual(helper.map_collection("a", "x"), ("b", "x"))
        self.assertEqual(helper.map_collection("a", "z"), ("a", "z"))
        self.assertEqual(helper.map_collection("d", "x"), (None, None))
Esempio n. 10
0
    def test_command_helper(self):

        mapping = {'a.x': 'b.x', 'a.y': 'c.y'}

        # Replace the origin dest_mapping
        dest_mapping_stru = DestMapping(list(mapping) + ['a.z'], [], mapping)

        helper = CommandHelper(dest_mapping_stru)

        self.assertEqual(set(helper.map_db('a')), set(['a', 'b', 'c']))
        self.assertEqual(helper.map_db('d'), [])

        self.assertEqual(helper.map_namespace('a.x'), 'b.x')
        self.assertEqual(helper.map_namespace('a.z'), 'a.z')
        self.assertEqual(helper.map_namespace('d.x'), None)

        self.assertEqual(helper.map_collection('a', 'x'), ('b', 'x'))
        self.assertEqual(helper.map_collection('a', 'z'), ('a', 'z'))
        self.assertEqual(helper.map_collection('d', 'x'), (None, None))
Esempio n. 11
0
    def test_command_helper(self):
        # Databases cannot be merged
        mapping = {'a.x': 'c.x', 'b.x': 'c.y'}
        self.assertRaises(errors.MongoConnectorError, CommandHelper,
                          list(mapping), mapping)

        mapping = {'a.x': 'b.x', 'a.y': 'c.y'}
        helper = CommandHelper(list(mapping) + ['a.z'], mapping)

        self.assertEqual(set(helper.map_db('a')), set(['a', 'b', 'c']))
        self.assertEqual(helper.map_db('d'), [])

        self.assertEqual(helper.map_namespace('a.x'), 'b.x')
        self.assertEqual(helper.map_namespace('a.z'), 'a.z')
        self.assertEqual(helper.map_namespace('d.x'), None)

        self.assertEqual(helper.map_collection('a', 'x'), ('b', 'x'))
        self.assertEqual(helper.map_collection('a', 'z'), ('a', 'z'))
        self.assertEqual(helper.map_collection('d', 'x'), (None, None))
Esempio n. 12
0
    def initOplogThread(self,
                        namespace_set=[],
                        ex_namespace_set=[],
                        dest_mapping={}):
        self.docman = CommandLoggerDocManager()
        # Replace the origin dest_mapping
        self.dest_mapping_stru = DestMapping(namespace_set, ex_namespace_set,
                                             dest_mapping)

        self.docman.command_helper = CommandHelper(self.dest_mapping_stru)
        self.opman = OplogThread(primary_client=self.primary_conn,
                                 doc_managers=(self.docman, ),
                                 oplog_progress_dict=self.oplog_progress,
                                 dest_mapping_stru=self.dest_mapping_stru,
                                 ns_set=namespace_set,
                                 ex_ns_set=ex_namespace_set,
                                 collection_dump=False)
        self.opman.start()
    def test_commands(self):
        self.docman.command_helper = CommandHelper()

        def count_ns(ns):
            return sum(1 for _ in self._search("ns:%s" % ns))

        self.docman.upsert({'_id': '1', 'test': 'data'}, *TESTARGS)
        self.assertEqual(count_ns("test.test"), 1)

        self.docman.handle_command({'drop': 'test'}, *TESTARGS)
        time.sleep(1)
        self.assertEqual(count_ns("test.test"), 0)

        self.docman.upsert({'_id': '2', 'test': 'data'}, 'test.test2', '2')
        self.docman.upsert({'_id': '3', 'test': 'data'}, 'test.test3', '3')
        self.docman.handle_command({'dropDatabase': 1}, 'test.$cmd', 1)
        time.sleep(1)
        self.assertEqual(count_ns("test.test2"), 0)
        self.assertEqual(count_ns("test.test3"), 0)
Esempio n. 14
0
    def buffer_and_drop(self):
        """Insert document and drop collection while doc is in buffer"""

        self.elastic_doc.command_helper = CommandHelper()

        self.elastic_doc.auto_commit_interval = None
        index = "test3"
        doc_type = "foo"
        cmd_args = ('%s.%s' % (index, doc_type), 1)

        doc_id = 1
        doc = {"_id": doc_id, "name": "bar"}
        self.elastic_doc.upsert(doc, *cmd_args)

        self.elastic_doc.handle_command({'drop': doc_type}, *cmd_args)
        retry_until_ok(self.elastic_conn.indices.refresh, index="")

        # Commit should be called before command has been handled
        # Which means that buffer should be empty
        self.assertFalse(self.elastic_doc.BulkBuffer.get_buffer())

        # After drop, below search should return no results
        res = list(
            self.elastic_doc._stream_search(index=index,
                                            doc_type=doc_type,
                                            body={"query": {
                                                "match_all": {}
                                            }}))
        self.assertFalse(res)

        # Test dropDatabase as well
        # Firstly add document to database again
        # This time update doc as well
        self.elastic_doc.upsert(doc, *cmd_args)
        update_spec = {"$set": {"name": "foo2"}}
        self.elastic_doc.update(doc_id, update_spec, *cmd_args)
        self.elastic_doc.handle_command({'dropDatabase': 1}, *cmd_args)
        retry_until_ok(self.elastic_conn.indices.refresh, index="")
        self.assertFalse(self.elastic_doc.BulkBuffer.get_buffer())
        self.assertNotIn(index, self._mappings())

        # set auto_commit_interval back to 0
        self.elastic_doc.auto_commit_interval = 0
    def test_commands(self):
        self.mongo_doc.command_helper = CommandHelper()

        # create test thing, assert
        self.mongo_doc.handle_command({'create': 'test'}, *TESTARGS)
        self.assertIn('test', self.mongo_conn['test'].collection_names())

        self.mongo_doc.handle_command(
            {'renameCollection': 'test.test', 'to': 'test.test2'},
            'admin.$cmd', 1)
        self.assertNotIn('test', self.mongo_conn['test'].collection_names())
        self.assertIn('test2', self.mongo_conn['test'].collection_names())

        self.mongo_doc.handle_command({'drop': 'test2'}, 'test.$cmd', 1)
        self.assertNotIn('test2', self.mongo_conn['test'].collection_names())

        self.assertIn('test', self.mongo_conn.database_names())
        self.mongo_doc.handle_command({'dropDatabase': 1}, 'test.$cmd', 1)
        self.assertNotIn('test', self.mongo_conn.database_names())
Esempio n. 16
0
    def test_commands(self):
        cmd_args = ('test.$cmd', 1)
        self.elastic_doc.command_helper = CommandHelper()

        self.elastic_doc.handle_command({'create': 'test2'}, *cmd_args)
        time.sleep(1)
        self.assertIn('test2', self._mappings('test'))

        self.elastic_doc.handle_command({'drop': 'test2'}, *cmd_args)
        time.sleep(1)
        self.assertNotIn('test2', self._mappings('test'))

        self.elastic_doc.handle_command({'create': 'test2'}, *cmd_args)
        self.elastic_doc.handle_command({'create': 'test3'}, *cmd_args)
        time.sleep(1)
        self.elastic_doc.handle_command({'dropDatabase': 1}, *cmd_args)
        time.sleep(1)
        self.assertNotIn('test', self._indices())
        self.assertNotIn('test2', self._mappings())
        self.assertNotIn('test3', self._mappings())
Esempio n. 17
0
    def test_commands(self):
        # Also test with namespace mapping.
        # Note that mongo-connector does not currently support commands after
        # renaming a database.
        namespace_config = NamespaceConfig(
            namespace_set=["test.test", "test.test2", "test.drop"],
            namespace_options={
                "test.test": "test.othertest",
                "test.drop": "dropped.collection",
            },
        )
        self.choosy_docman.command_helper = CommandHelper(namespace_config)

        try:
            self.choosy_docman.handle_command({"create": "test"}, *TESTARGS)
            self.assertIn("othertest", self.mongo_conn["test"].collection_names())
            self.choosy_docman.handle_command(
                {"renameCollection": "test.test", "to": "test.test2"}, "admin.$cmd", 1
            )
            self.assertNotIn("othertest", self.mongo_conn["test"].collection_names())
            self.assertIn("test2", self.mongo_conn["test"].collection_names())

            self.choosy_docman.handle_command({"drop": "test2"}, "test.$cmd", 1)
            self.assertNotIn("test2", self.mongo_conn["test"].collection_names())

            # WiredTiger drops the database when the last collection is
            # dropped.
            if "test" not in self.mongo_conn.database_names():
                self.choosy_docman.handle_command({"create": "test"}, *TESTARGS)
            self.assertIn("test", self.mongo_conn.database_names())
            self.choosy_docman.handle_command({"dropDatabase": 1}, "test.$cmd", 1)
            self.assertNotIn("test", self.mongo_conn.database_names())

            # Briefly test mapped database name with dropDatabase command.
            self.mongo_conn.dropped.collection.insert_one({"a": 1})
            self.assertIn("dropped", self.mongo_conn.database_names())
            self.choosy_docman.handle_command({"dropDatabase": 1}, "test.$cmd", 1)
            self.assertNotIn("dropped", self.mongo_conn.database_names())
        finally:
            self.mongo_conn.drop_database("test")
    def test_command_helper(self):

        mapping = {"a.x": "b.x", "a.y": "c.y"}

        helper = CommandHelper(
            NamespaceConfig(
                namespace_set=list(mapping) + ["a.z"], namespace_options=mapping
            )
        )

        self.assertEqual(set(helper.map_db("a")), set(["a", "b", "c"]))
        self.assertEqual(helper.map_db("d"), [])

        self.assertEqual(helper.map_namespace("a.x"), "b.x")
        self.assertEqual(helper.map_namespace("a.z"), "a.z")
        self.assertEqual(helper.map_namespace("d.x"), None)

        self.assertEqual(helper.map_collection("a", "x"), ("b", "x"))
        self.assertEqual(helper.map_collection("a", "z"), ("a", "z"))
        self.assertEqual(helper.map_collection("d", "x"), (None, None))
    def test_command_helper(self):

        mapping = {
            'a.x': 'b.x',
            'a.y': 'c.y'
        }

        helper = CommandHelper(NamespaceConfig(
            namespace_set=list(mapping) + ['a.z'], namespace_options=mapping))

        self.assertEqual(set(helper.map_db('a')), set(['a', 'b', 'c']))
        self.assertEqual(helper.map_db('d'), [])

        self.assertEqual(helper.map_namespace('a.x'), 'b.x')
        self.assertEqual(helper.map_namespace('a.z'), 'a.z')
        self.assertEqual(helper.map_namespace('d.x'), None)

        self.assertEqual(helper.map_collection('a', 'x'), ('b', 'x'))
        self.assertEqual(helper.map_collection('a', 'z'), ('a', 'z'))
        self.assertEqual(helper.map_collection('d', 'x'), (None, None))
Esempio n. 20
0
    def test_commands(self):
        # Also test with namespace mapping.
        # Note that mongo-connector does not currently support commands after
        # renaming a database.
        self.mongo_doc.command_helper = CommandHelper(
            namespace_set=['test.test', 'test.test2', 'test.drop'],
            dest_mapping={
                'test.test': 'test.othertest',
                'test.drop': 'dropped.collection'
            })

        try:
            self.mongo_doc.handle_command({'create': 'test'}, *TESTARGS)
            self.assertIn('othertest',
                          self.mongo_conn['test'].collection_names())
            self.mongo_doc.handle_command(
                {'renameCollection': 'test.test', 'to': 'test.test2'},
                'admin.$cmd', 1)
            self.assertNotIn('othertest',
                             self.mongo_conn['test'].collection_names())
            self.assertIn('test2',
                          self.mongo_conn['test'].collection_names())

            self.mongo_doc.handle_command(
                {'drop': 'test2'}, 'test.$cmd', 1)
            self.assertNotIn('test2',
                             self.mongo_conn['test'].collection_names())

            self.assertIn('test', self.mongo_conn.database_names())
            self.mongo_doc.handle_command({'dropDatabase': 1}, 'test.$cmd', 1)
            self.assertNotIn('test', self.mongo_conn.database_names())

            # Briefly test mapped database name with dropDatabase command.
            self.mongo_conn.dropped.collection.insert({'a': 1})
            self.assertIn('dropped', self.mongo_conn.database_names())
            self.mongo_doc.handle_command({'dropDatabase': 1}, 'test.$cmd', 1)
            self.assertNotIn('dropped', self.mongo_conn.database_names())
        finally:
            self.mongo_conn.drop_database('test')
    def test_command_helper(self):

        mapping = {
            'a.x': 'b.x',
            'a.y': 'c.y'
        }

        # Replace the origin dest_mapping
        dest_mapping_stru = DestMapping(list(mapping) + ['a.z'], [], mapping)

        helper = CommandHelper(dest_mapping_stru)

        self.assertEqual(set(helper.map_db('a')), set(['a', 'b', 'c']))
        self.assertEqual(helper.map_db('d'), [])

        self.assertEqual(helper.map_namespace('a.x'), 'b.x')
        self.assertEqual(helper.map_namespace('a.z'), 'a.z')
        self.assertEqual(helper.map_namespace('d.x'), None)

        self.assertEqual(helper.map_collection('a', 'x'), ('b', 'x'))
        self.assertEqual(helper.map_collection('a', 'z'), ('a', 'z'))
        self.assertEqual(helper.map_collection('d', 'x'), (None, None))
Esempio n. 22
0
    def __init__(self, mongo_address, doc_managers=None, **kwargs):
        super(Connector, self).__init__()

        # can_run is set to false when we join the thread
        self.can_run = True

        # The signal that caused the connector to stop or None
        self.signal = None

        # main address - either mongos for sharded setups or a primary otherwise
        self.address = mongo_address

        # connection to the main address
        self.main_conn = None

        # List of DocManager instances
        if doc_managers:
            self.doc_managers = doc_managers
        else:
            LOG.warning('No doc managers specified, using simulator.')
            self.doc_managers = (simulator.DocManager(),)

        # Password for authentication
        self.auth_key = kwargs.pop('auth_key', None)

        # Username for authentication
        self.auth_username = kwargs.pop('auth_username', None)

        # The name of the file that stores the progress of the OplogThreads
        self.oplog_checkpoint = kwargs.pop('oplog_checkpoint',
                                           'oplog.timestamp')

        # The set of OplogThreads created
        self.shard_set = {}

        # Dict of OplogThread/timestamp pairs to record progress
        self.oplog_progress = LockingDict()

        # Timezone awareness
        self.tz_aware = kwargs.get('tz_aware', False)

        # SSL keyword arguments to MongoClient.
        ssl_certfile = kwargs.pop('ssl_certfile', None)
        ssl_ca_certs = kwargs.pop('ssl_ca_certs', None)
        ssl_keyfile = kwargs.pop('ssl_keyfile', None)
        ssl_cert_reqs = kwargs.pop('ssl_cert_reqs', None)
        self.ssl_kwargs = {}
        if ssl_certfile:
            self.ssl_kwargs['ssl_certfile'] = ssl_certfile
        if ssl_ca_certs:
            self.ssl_kwargs['ssl_ca_certs'] = ssl_ca_certs
        if ssl_keyfile:
            self.ssl_kwargs['ssl_keyfile'] = ssl_keyfile
        if ssl_cert_reqs:
            self.ssl_kwargs['ssl_cert_reqs'] = ssl_cert_reqs

        # Save the rest of kwargs.
        self.kwargs = kwargs

        # Replace the origin dest_mapping
        self.dest_mapping = DestMapping(kwargs.get('ns_set', []),
                                        kwargs.get('ex_ns_set', []),
                                        kwargs.get('dest_mapping', {}))

        # Initialize and set the command helper
        command_helper = CommandHelper(self.dest_mapping)
        for dm in self.doc_managers:
            dm.command_helper = command_helper

        if self.oplog_checkpoint is not None:
            if not os.path.exists(self.oplog_checkpoint):
                info_str = ("MongoConnector: Can't find %s, "
                            "attempting to create an empty progress log" %
                            self.oplog_checkpoint)
                LOG.warning(info_str)
                try:
                    # Create oplog progress file
                    open(self.oplog_checkpoint, "w").close()
                except IOError as e:
                    LOG.critical("MongoConnector: Could not "
                                 "create a progress log: %s" %
                                 str(e))
                    sys.exit(2)
            else:
                if (not os.access(self.oplog_checkpoint, os.W_OK)
                        and not os.access(self.oplog_checkpoint, os.R_OK)):
                    LOG.critical("Invalid permissions on %s! Exiting" %
                                 (self.oplog_checkpoint))
                    sys.exit(2)
Esempio n. 23
0
class DocManager(DocManagerBase):
    """ArangoDB implementation of the DocManager interface.

    Receives documents from an OplogThread and takes the appropriate actions on
    ArangoDB.
    """
    def __init__(self,
                 url,
                 auto_commit_interval=None,
                 chunk_size=constants.DEFAULT_MAX_BULK,
                 **kwargs):
        """ Verify URL and establish a connection.
        """
        self.url = url
        self.auto_commit_interval = auto_commit_interval
        self.unique_key = '_key'
        self.chunk_size = chunk_size
        self.kwargs = kwargs

        self.arango = self.create_connection()

        # define meta database and meta collection names
        self.meta_database = "mongodb_meta"
        self.meta_collection = "mongodb_data"

        # check if meta database and meta collection are already present,
        # if not then create both
        if self.meta_database not in self.arango.databases():
            self.meta_database = self.arango.create_database(
                self.meta_database)
            self.meta_database.create_collection(self.meta_collection)
        else:
            self.meta_database = self.arango.db(self.meta_database)

        self.command_helper = CommandHelper()

    def verify_connection(self, connection):
        try:
            connection.verify()
        except ServerConnectionError:
            raise ServerConnectionError(
                "\nSeems that ArangoDB is running with Authentication.\n"
                "Please run the following,\n"
                "connector_arango_auth set, to set\n"
                "connector_arango_auth reset, to reset\n"
                "connector_arango_auth flush, to disable\n"
                "followed by source ~/.bashrc\n"
                "the ArangoDB authentication\n"
                "Refer readme.rst for more details")
        return

    @wrap_exceptions
    def create_connection(self):
        """Creates ArangoDB connection
        """
        # Extract host and port from URL
        host, port = self.get_host_port(self.url)
        # Extract Arango username and password from environment variable
        arango_username = os.environ.get('USER_ARANGO')
        arango_password = os.environ.get('PASSWD_ARANGO')

        if not arango_username and not arango_password:
            # Create Arrango connection
            arango_connection = ArangoClient(host=host, port=port)
            self.verify_connection(arango_connection)

        elif (not arango_username and arango_password) or \
                (arango_username and not arango_password):
            raise Exception("Invalid credentials, ArangoDB username/"
                            "password can't be blank")

        elif arango_username and arango_password:
            # Create Arrango connection
            arango_connection = ArangoClient(host=host,
                                             port=port,
                                             username=arango_username,
                                             password=arango_password)
            self.verify_connection(arango_connection)

        return arango_connection

    @wrap_exceptions
    def get_host_port(self, address):
        """Extracts host and port from URL
        """
        address_list = address.split(':')
        host = address_list[0]
        port = int(address_list[1])
        return host, port

    @wrap_exceptions
    def check_if_database_exists(self, database):
        """Checks if database exists
        """
        databases = self.arango.databases()
        if database in databases:
            return True

        return False

    @wrap_exceptions
    def check_if_collection_exists(self, database, coll):
        """Checks if collection exists
        """
        if not isinstance(database, arango.database.Database):
            database = self.arango.db(database)

        collections = database.collections()
        for item in collections:
            if item['name'] == coll:
                return True

        return False

    def apply_update(self, doc, update_spec):
        """Performs necessary update operations on the document and
        returns the updated document
        """
        if "$set" not in update_spec and "$unset" not in update_spec:
            # Don't try to add ns and _ts fields back in from doc
            return update_spec
        return super(DocManager, self).apply_update(doc, update_spec)

    def _db_and_collection(self, namespace):
        """Extracts the database and collection name
        from namespace string
        """
        return namespace.split('.', 1)

    def stop(self):
        """Stops any running threads
        """
        LOG.info(
            "Mongo DocManager Stopped: If you will not target this system "
            "again with mongo-connector then you may drop the database "
            "__mongo_connector, which holds metadata for Mongo Connector.")

    @wrap_exceptions
    def handle_command(self, doc, namespace, timestamp):
        """Handles operations at database as well as collection level
        like, create database, delete database, create collection,
        delete collection and rename collection
        """
        db, _ = self._db_and_collection(namespace)

        if doc.get('dropDatabase'):
            for new_db in self.command_helper.map_db(db):
                self.arango.delete_database(new_db)

        if doc.get('renameCollection'):
            source_namespace = self.command_helper.map_namespace(
                doc['renameCollection'])
            source_db, source_coll = self._db_and_collection(source_namespace)
            target_namespace = self.command_helper.map_namespace(doc['to'])
            target_db, target_coll = self._db_and_collection(target_namespace)

            if source_namespace and target_coll:
                source_db = self.arango.db(source_db)
                source_coll = source_db.collection(source_coll)
                source_coll.rename(target_coll)

        if doc.get('create'):
            new_db, coll = self.command_helper.map_collection(
                db, doc['create'])
            if new_db:
                db_response = self.check_if_database_exists(new_db)
                if not db_response:
                    new_db = self.arango.create_database(db)

                coll_response = self.check_if_collection_exists(new_db, coll)
                if not coll_response:
                    if not isinstance(new_db, arango.database.Database):
                        new_db = self.arango.db(new_db)
                    new_db.create_collection(coll)

        if doc.get('drop'):
            new_db, coll = self.command_helper.map_collection(db, doc['drop'])
            new_db = self.arango.db(new_db)
            new_db.delete_collection(coll)

    @wrap_exceptions
    def update(self, document_id, update_spec, namespace, timestamp):
        """Apply updates given in update_spec to the document whose id
        matches that of doc.

        """
        document_id = self.pre_process_id(document_id)

        db, coll = self._db_and_collection(namespace)
        database = self.arango.db(db)
        coll = database.collection(coll)

        meta_collection = self.meta_database.collection(self.meta_collection)

        meta = {
            self.unique_key: document_id,
            "_ts": timestamp,
            "ns": namespace
        }

        meta_cursor = meta_collection.find({
            self.unique_key: document_id,
            "ns": namespace
        })
        if meta_cursor.count():
            meta_collection.replace(meta)

        cursor = coll.find({self.unique_key: document_id})
        if cursor.count():
            document = cursor.next()
            updated = self.apply_update(document, update_spec)
            updated['_id'] = document_id
            self.upsert(updated, namespace, timestamp)
        else:
            LOG.error("The document %s, which you are trying to update \
                is missing in ArangoDB" % document_id)

    @wrap_exceptions
    def upsert(self, doc, namespace, timestamp):
        """Update or insert a document into Mongo
        """
        # get database and collection name from namespace
        database, coll = self._db_and_collection(namespace)

        doc_ = {"create": coll}
        self.handle_command(doc_, namespace, timestamp)

        # get database instance
        database = self.arango.db(database)

        # get collection instance
        coll = database.collection(coll)

        # covert doc_id to string type from bson.objectid.ObjectId type
        doc_id = self.pre_process_id(doc.get('_id'))

        # pop _id from document
        doc.pop('_id')

        # get meta_collection instance
        meta_collection = self.meta_database.collection(self.meta_collection)

        # create meta for inserting into meta collection
        meta = {self.unique_key: doc_id, "_ts": timestamp, "ns": namespace}

        # check if the doc with given doc_id is already present in ArangoDB
        meta_cursor = meta_collection.find({
            self.unique_key: doc_id,
            "ns": namespace
        })

        if meta_cursor.count():
            # replace existing doc
            meta_collection.replace(meta)
        else:
            # insert new doc
            meta_collection.insert(meta)

        # update "_id" field's value as "_key" field's value, as ArangoDB \
        # keeps "_key" as an unique key across the collection
        doc.update({self.unique_key: doc_id})

        if coll.has(doc_id):
            # replace existing doc
            coll.replace(doc)

        else:
            # insert new doc
            coll.insert(doc)

    def pre_process_id(self, doc_id):
        """Coverts doc id into string and ultimately into the format required
            for _key field
        """
        doc_id = str(doc_id)
        if '.' not in doc_id:
            doc_id = doc_id + '.'

        return doc_id

    @wrap_exceptions
    def bulk_upsert(self, docs, namespace, timestamp):
        """Performs bulk insert operations
        """
        dbname, collname = self._db_and_collection(namespace)

        doc = {"create": collname}
        self.handle_command(doc, namespace, timestamp)

        dbname = self.arango.db(dbname)
        collname = dbname.collection(collname)
        meta_coll = self.meta_database.collection(self.meta_collection)

        def iterate_chunks():

            more_chunks = True
            while more_chunks:
                bulk = []
                bulk_meta = []
                for i in range(self.chunk_size):
                    try:
                        doc = next(docs)
                        doc_id = doc.get('_id')
                        doc_id = self.pre_process_id(doc_id)
                        doc.pop('_id')
                        doc.update({self.unique_key: doc_id})
                        bulk.append(doc)
                        bulk_meta.append({
                            self.unique_key: doc_id,
                            'ns': namespace,
                            '_ts': timestamp
                        })
                    except StopIteration:
                        more_chunks = False
                        if i > 0:
                            yield bulk, bulk_meta, collname, meta_coll
                        break
                if more_chunks:
                    yield bulk, bulk_meta, collname, meta_coll

        for bulk_op, meta_bulk_op, collname, meta_coll in iterate_chunks():
            collname.import_bulk(documents=bulk_op, on_duplicate="replace")
            meta_coll.import_bulk(documents=meta_bulk_op,
                                  on_duplicate="replace")

    @wrap_exceptions
    def remove(self, document_id, namespace, timestamp):
        """Removes document from Mongo

        The input is a python dictionary that represents a mongo document.
        The documents has ns and _ts fields.
        """
        document_id = self.pre_process_id(document_id)

        database, coll = self._db_and_collection(namespace)
        database = self.arango.db(database)
        coll = database.collection(coll)

        meta_collection = self.meta_database.collection(self.meta_collection)

        meta_collection.delete(document_id)

        coll.delete(document_id)

    @wrap_exceptions
    def search(self, start_ts, end_ts):
        """Query ArangoDB for documents in a time range.

        This method is used to find documents that may be n conflict during
        a rollback event in MongoDB.
        """
        meta_coll = self.meta_database.collection(self.meta_collection)

        for doc in meta_coll.find_in_range("_ts", start_ts, end_ts):
            yield doc

    @wrap_exceptions
    def get_last_doc(self):
        """Return the document most recently modified in the target system
        """
        query = 'FOR doc IN {} SORT doc._ts DESC LIMIT 1 RETURN doc'.format(
            self.meta_collection)
        result = self.meta_database.aql.execute(query)
        for doc in result:
            return doc