예제 #1
0
    def __init__(self,
                 url,
                 auto_commit_interval=None,
                 chunk_size=constants.DEFAULT_MAX_BULK,
                 **kwargs):
        """ Verify URL and establish a connection.
        """
        self.url = url
        self.auto_commit_interval = auto_commit_interval
        self.unique_key = '_key'
        self.chunk_size = chunk_size
        self.kwargs = kwargs

        self.arango = self.create_connection()

        # define meta database and meta collection names
        self.meta_database = "mongodb_meta"
        self.meta_collection = "mongodb_data"

        # check if meta database and meta collection are already present,
        # if not then create both
        if self.meta_database not in self.arango.databases():
            self.meta_database = self.arango.create_database(
                self.meta_database)
            self.meta_database.create_collection(self.meta_collection)
        else:
            self.meta_database = self.arango.db(self.meta_database)

        self.command_helper = CommandHelper()
예제 #2
0
    def test_commands(self):
        cmd_args = ("test.$cmd", 1)
        self.elastic_doc.command_helper = CommandHelper()

        self.elastic_doc.handle_command({"create": "test2"}, *cmd_args)
        retry_until_ok(self.elastic_conn.indices.refresh, index="")
        self.assertIn("test2", self._mappings("test"))

        docs = [
            {
                "_id": 0,
                "name": "ted"
            },
            {
                "_id": 1,
                "name": "marsha"
            },
            {
                "_id": 2,
                "name": "nikolas"
            },
        ]
        self.elastic_doc.upsert(docs[0], "test.test2", 1)
        self.elastic_doc.upsert(docs[1], "test.test2", 1)
        self.elastic_doc.upsert(docs[2], "test.test2", 1)

        # Commit upserted docs as they are in buffer
        self.elastic_doc.commit()

        res = list(
            self.elastic_doc._stream_search(index="test",
                                            doc_type="test2",
                                            body={"query": {
                                                "match_all": {}
                                            }}))
        for d in docs:
            self.assertTrue(d in res)

        self.elastic_doc.handle_command({"drop": "test2"}, *cmd_args)
        retry_until_ok(self.elastic_conn.indices.refresh, index="")
        res = list(
            self.elastic_doc._stream_search(index="test",
                                            doc_type="test2",
                                            body={"query": {
                                                "match_all": {}
                                            }}))
        self.assertEqual(0, len(res))

        self.elastic_doc.handle_command({"create": "test2"}, *cmd_args)
        self.elastic_doc.handle_command({"create": "test3"}, *cmd_args)
        retry_until_ok(self.elastic_conn.indices.refresh, index="")
        self.elastic_doc.handle_command({"dropDatabase": 1}, *cmd_args)
        retry_until_ok(self.elastic_conn.indices.refresh, index="")
        self.assertNotIn("test", self._indices())
        self.assertNotIn("test2", self._mappings())
        self.assertNotIn("test3", self._mappings())
예제 #3
0
 def initOplogThread(self, namespace_set=[], dest_mapping={}):
     self.docman = CommandLoggerDocManager()
     self.docman.command_helper = CommandHelper(namespace_set, dest_mapping)
     self.opman = OplogThread(primary_client=self.primary_conn,
                              doc_managers=(self.docman, ),
                              oplog_progress_dict=self.oplog_progress,
                              namespace_set=namespace_set,
                              dest_mapping=dest_mapping,
                              collection_dump=False)
     self.opman.start()
예제 #4
0
    def initOplogThread(self, namespace_set=None):
        self.docman = CommandLoggerDocManager()
        namespace_config = NamespaceConfig(namespace_set=namespace_set)

        self.docman.command_helper = CommandHelper(namespace_config)
        self.opman = OplogThread(primary_client=self.primary_conn,
                                 doc_managers=(self.docman, ),
                                 oplog_progress_dict=self.oplog_progress,
                                 namespace_config=namespace_config,
                                 collection_dump=False)
        self.opman.start()
    def test_commands(self):
        cmd_args = ('test.$cmd', 1)
        self.elastic_doc.command_helper = CommandHelper()

        self.elastic_doc.handle_command({'create': 'test2'}, *cmd_args)
        time.sleep(1)
        self.assertIn('test2', self._mappings('test'))

        docs = [{
            "_id": 0,
            "name": "ted"
        }, {
            "_id": 1,
            "name": "marsha"
        }, {
            "_id": 2,
            "name": "nikolas"
        }]
        self.elastic_doc.upsert(docs[0], 'test.test2', 1)
        self.elastic_doc.upsert(docs[1], 'test.test2', 1)
        self.elastic_doc.upsert(docs[2], 'test.test2', 1)
        res = list(
            self.elastic_doc._stream_search(index="test",
                                            doc_type='test2',
                                            body={"query": {
                                                "match_all": {}
                                            }}))
        for d in docs:
            self.assertTrue(d in res)

        self.elastic_doc.handle_command({'drop': 'test2'}, *cmd_args)
        time.sleep(3)
        res = list(
            self.elastic_doc._stream_search(index="test",
                                            doc_type='test2',
                                            body={"query": {
                                                "match_all": {}
                                            }}))
        self.assertEqual(0, len(res))

        self.elastic_doc.handle_command({'create': 'test2'}, *cmd_args)
        self.elastic_doc.handle_command({'create': 'test3'}, *cmd_args)
        time.sleep(1)
        self.elastic_doc.handle_command({'dropDatabase': 1}, *cmd_args)
        time.sleep(1)
        self.assertNotIn('test', self._indices())
        self.assertNotIn('test2', self._mappings())
        self.assertNotIn('test3', self._mappings())
예제 #6
0
    def test_commands(self):
        # Also test with namespace mapping.
        # Note that mongo-connector does not currently support commands after
        # renaming a database.
        namespace_config = NamespaceConfig(
            namespace_set=['test.test', 'test.test2', 'test.drop'],
            namespace_options={
                'test.test': 'test.othertest',
                'test.drop': 'dropped.collection'
            })
        self.choosy_docman.command_helper = CommandHelper(namespace_config)

        try:
            self.choosy_docman.handle_command({'create': 'test'}, *TESTARGS)
            self.assertIn('othertest',
                          self.mongo_conn['test'].collection_names())
            self.choosy_docman.handle_command(
                {
                    'renameCollection': 'test.test',
                    'to': 'test.test2'
                }, 'admin.$cmd', 1)
            self.assertNotIn('othertest',
                             self.mongo_conn['test'].collection_names())
            self.assertIn('test2', self.mongo_conn['test'].collection_names())

            self.choosy_docman.handle_command({'drop': 'test2'}, 'test.$cmd',
                                              1)
            self.assertNotIn('test2',
                             self.mongo_conn['test'].collection_names())

            # WiredTiger drops the database when the last collection is
            # dropped.
            if 'test' not in self.mongo_conn.database_names():
                self.choosy_docman.handle_command({'create': 'test'},
                                                  *TESTARGS)
            self.assertIn('test', self.mongo_conn.database_names())
            self.choosy_docman.handle_command({'dropDatabase': 1}, 'test.$cmd',
                                              1)
            self.assertNotIn('test', self.mongo_conn.database_names())

            # Briefly test mapped database name with dropDatabase command.
            self.mongo_conn.dropped.collection.insert_one({'a': 1})
            self.assertIn('dropped', self.mongo_conn.database_names())
            self.choosy_docman.handle_command({'dropDatabase': 1}, 'test.$cmd',
                                              1)
            self.assertNotIn('dropped', self.mongo_conn.database_names())
        finally:
            self.mongo_conn.drop_database('test')
    def test_command_helper(self):

        mapping = {"a.x": "b.x", "a.y": "c.y"}

        helper = CommandHelper(
            NamespaceConfig(namespace_set=list(mapping) + ["a.z"],
                            namespace_options=mapping))

        self.assertEqual(set(helper.map_db("a")), set(["a", "b", "c"]))
        self.assertEqual(helper.map_db("d"), [])

        self.assertEqual(helper.map_namespace("a.x"), "b.x")
        self.assertEqual(helper.map_namespace("a.z"), "a.z")
        self.assertEqual(helper.map_namespace("d.x"), None)

        self.assertEqual(helper.map_collection("a", "x"), ("b", "x"))
        self.assertEqual(helper.map_collection("a", "z"), ("a", "z"))
        self.assertEqual(helper.map_collection("d", "x"), (None, None))
예제 #8
0
    def initOplogThread(self,
                        namespace_set=[],
                        ex_namespace_set=[],
                        dest_mapping={}):
        self.docman = CommandLoggerDocManager()
        # Replace the origin dest_mapping
        self.dest_mapping_stru = DestMapping(namespace_set, ex_namespace_set,
                                             dest_mapping)

        self.docman.command_helper = CommandHelper(self.dest_mapping_stru)
        self.opman = OplogThread(primary_client=self.primary_conn,
                                 doc_managers=(self.docman, ),
                                 oplog_progress_dict=self.oplog_progress,
                                 dest_mapping_stru=self.dest_mapping_stru,
                                 ns_set=namespace_set,
                                 ex_ns_set=ex_namespace_set,
                                 collection_dump=False)
        self.opman.start()
예제 #9
0
    def test_command_helper(self):

        mapping = {'a.x': 'b.x', 'a.y': 'c.y'}

        helper = CommandHelper(
            NamespaceConfig(namespace_set=list(mapping) + ['a.z'],
                            namespace_options=mapping))

        self.assertEqual(set(helper.map_db('a')), set(['a', 'b', 'c']))
        self.assertEqual(helper.map_db('d'), [])

        self.assertEqual(helper.map_namespace('a.x'), 'b.x')
        self.assertEqual(helper.map_namespace('a.z'), 'a.z')
        self.assertEqual(helper.map_namespace('d.x'), None)

        self.assertEqual(helper.map_collection('a', 'x'), ('b', 'x'))
        self.assertEqual(helper.map_collection('a', 'z'), ('a', 'z'))
        self.assertEqual(helper.map_collection('d', 'x'), (None, None))
예제 #10
0
    def test_command_helper(self):

        mapping = {'a.x': 'b.x', 'a.y': 'c.y'}

        # Replace the origin dest_mapping
        dest_mapping_stru = DestMapping(list(mapping) + ['a.z'], [], mapping)

        helper = CommandHelper(dest_mapping_stru)

        self.assertEqual(set(helper.map_db('a')), set(['a', 'b', 'c']))
        self.assertEqual(helper.map_db('d'), [])

        self.assertEqual(helper.map_namespace('a.x'), 'b.x')
        self.assertEqual(helper.map_namespace('a.z'), 'a.z')
        self.assertEqual(helper.map_namespace('d.x'), None)

        self.assertEqual(helper.map_collection('a', 'x'), ('b', 'x'))
        self.assertEqual(helper.map_collection('a', 'z'), ('a', 'z'))
        self.assertEqual(helper.map_collection('d', 'x'), (None, None))
    def test_commands(self):
        self.docman.command_helper = CommandHelper()

        def count_ns(ns):
            return sum(1 for _ in self._search("ns:%s" % ns))

        self.docman.upsert({'_id': '1', 'test': 'data'}, *TESTARGS)
        self.assertEqual(count_ns("test.test"), 1)

        self.docman.handle_command({'drop': 'test'}, *TESTARGS)
        time.sleep(1)
        self.assertEqual(count_ns("test.test"), 0)

        self.docman.upsert({'_id': '2', 'test': 'data'}, 'test.test2', '2')
        self.docman.upsert({'_id': '3', 'test': 'data'}, 'test.test3', '3')
        self.docman.handle_command({'dropDatabase': 1}, 'test.$cmd', 1)
        time.sleep(1)
        self.assertEqual(count_ns("test.test2"), 0)
        self.assertEqual(count_ns("test.test3"), 0)
예제 #12
0
    def buffer_and_drop(self):
        """Insert document and drop collection while doc is in buffer"""

        self.elastic_doc.command_helper = CommandHelper()

        self.elastic_doc.auto_commit_interval = None
        index = "test3"
        doc_type = "foo"
        cmd_args = ('%s.%s' % (index, doc_type), 1)

        doc_id = 1
        doc = {"_id": doc_id, "name": "bar"}
        self.elastic_doc.upsert(doc, *cmd_args)

        self.elastic_doc.handle_command({'drop': doc_type}, *cmd_args)
        retry_until_ok(self.elastic_conn.indices.refresh, index="")

        # Commit should be called before command has been handled
        # Which means that buffer should be empty
        self.assertFalse(self.elastic_doc.BulkBuffer.get_buffer())

        # After drop, below search should return no results
        res = list(
            self.elastic_doc._stream_search(index=index,
                                            doc_type=doc_type,
                                            body={"query": {
                                                "match_all": {}
                                            }}))
        self.assertFalse(res)

        # Test dropDatabase as well
        # Firstly add document to database again
        # This time update doc as well
        self.elastic_doc.upsert(doc, *cmd_args)
        update_spec = {"$set": {"name": "foo2"}}
        self.elastic_doc.update(doc_id, update_spec, *cmd_args)
        self.elastic_doc.handle_command({'dropDatabase': 1}, *cmd_args)
        retry_until_ok(self.elastic_conn.indices.refresh, index="")
        self.assertFalse(self.elastic_doc.BulkBuffer.get_buffer())
        self.assertNotIn(index, self._mappings())

        # set auto_commit_interval back to 0
        self.elastic_doc.auto_commit_interval = 0
예제 #13
0
    def test_command_helper(self):
        # Databases cannot be merged
        mapping = {'a.x': 'c.x', 'b.x': 'c.y'}
        self.assertRaises(errors.MongoConnectorError, CommandHelper,
                          list(mapping), mapping)

        mapping = {'a.x': 'b.x', 'a.y': 'c.y'}
        helper = CommandHelper(list(mapping) + ['a.z'], mapping)

        self.assertEqual(set(helper.map_db('a')), set(['a', 'b', 'c']))
        self.assertEqual(helper.map_db('d'), [])

        self.assertEqual(helper.map_namespace('a.x'), 'b.x')
        self.assertEqual(helper.map_namespace('a.z'), 'a.z')
        self.assertEqual(helper.map_namespace('d.x'), None)

        self.assertEqual(helper.map_collection('a', 'x'), ('b', 'x'))
        self.assertEqual(helper.map_collection('a', 'z'), ('a', 'z'))
        self.assertEqual(helper.map_collection('d', 'x'), (None, None))
    def test_commands(self):
        self.mongo_doc.command_helper = CommandHelper()

        # create test thing, assert
        self.mongo_doc.handle_command({'create': 'test'}, *TESTARGS)
        self.assertIn('test', self.mongo_conn['test'].collection_names())

        self.mongo_doc.handle_command(
            {'renameCollection': 'test.test', 'to': 'test.test2'},
            'admin.$cmd', 1)
        self.assertNotIn('test', self.mongo_conn['test'].collection_names())
        self.assertIn('test2', self.mongo_conn['test'].collection_names())

        self.mongo_doc.handle_command({'drop': 'test2'}, 'test.$cmd', 1)
        self.assertNotIn('test2', self.mongo_conn['test'].collection_names())

        self.assertIn('test', self.mongo_conn.database_names())
        self.mongo_doc.handle_command({'dropDatabase': 1}, 'test.$cmd', 1)
        self.assertNotIn('test', self.mongo_conn.database_names())
예제 #15
0
    def test_commands(self):
        cmd_args = ('test.$cmd', 1)
        self.elastic_doc.command_helper = CommandHelper()

        self.elastic_doc.handle_command({'create': 'test2'}, *cmd_args)
        time.sleep(1)
        self.assertIn('test2', self._mappings('test'))

        self.elastic_doc.handle_command({'drop': 'test2'}, *cmd_args)
        time.sleep(1)
        self.assertNotIn('test2', self._mappings('test'))

        self.elastic_doc.handle_command({'create': 'test2'}, *cmd_args)
        self.elastic_doc.handle_command({'create': 'test3'}, *cmd_args)
        time.sleep(1)
        self.elastic_doc.handle_command({'dropDatabase': 1}, *cmd_args)
        time.sleep(1)
        self.assertNotIn('test', self._indices())
        self.assertNotIn('test2', self._mappings())
        self.assertNotIn('test3', self._mappings())
예제 #16
0
    def test_commands(self):
        # Also test with namespace mapping.
        # Note that mongo-connector does not currently support commands after
        # renaming a database.
        namespace_config = NamespaceConfig(
            namespace_set=["test.test", "test.test2", "test.drop"],
            namespace_options={
                "test.test": "test.othertest",
                "test.drop": "dropped.collection",
            },
        )
        self.choosy_docman.command_helper = CommandHelper(namespace_config)

        try:
            self.choosy_docman.handle_command({"create": "test"}, *TESTARGS)
            self.assertIn("othertest", self.mongo_conn["test"].collection_names())
            self.choosy_docman.handle_command(
                {"renameCollection": "test.test", "to": "test.test2"}, "admin.$cmd", 1
            )
            self.assertNotIn("othertest", self.mongo_conn["test"].collection_names())
            self.assertIn("test2", self.mongo_conn["test"].collection_names())

            self.choosy_docman.handle_command({"drop": "test2"}, "test.$cmd", 1)
            self.assertNotIn("test2", self.mongo_conn["test"].collection_names())

            # WiredTiger drops the database when the last collection is
            # dropped.
            if "test" not in self.mongo_conn.database_names():
                self.choosy_docman.handle_command({"create": "test"}, *TESTARGS)
            self.assertIn("test", self.mongo_conn.database_names())
            self.choosy_docman.handle_command({"dropDatabase": 1}, "test.$cmd", 1)
            self.assertNotIn("test", self.mongo_conn.database_names())

            # Briefly test mapped database name with dropDatabase command.
            self.mongo_conn.dropped.collection.insert_one({"a": 1})
            self.assertIn("dropped", self.mongo_conn.database_names())
            self.choosy_docman.handle_command({"dropDatabase": 1}, "test.$cmd", 1)
            self.assertNotIn("dropped", self.mongo_conn.database_names())
        finally:
            self.mongo_conn.drop_database("test")
예제 #17
0
    def test_commands(self):
        # Also test with namespace mapping.
        # Note that mongo-connector does not currently support commands after
        # renaming a database.
        self.mongo_doc.command_helper = CommandHelper(
            namespace_set=['test.test', 'test.test2', 'test.drop'],
            dest_mapping={
                'test.test': 'test.othertest',
                'test.drop': 'dropped.collection'
            })

        try:
            self.mongo_doc.handle_command({'create': 'test'}, *TESTARGS)
            self.assertIn('othertest',
                          self.mongo_conn['test'].collection_names())
            self.mongo_doc.handle_command(
                {'renameCollection': 'test.test', 'to': 'test.test2'},
                'admin.$cmd', 1)
            self.assertNotIn('othertest',
                             self.mongo_conn['test'].collection_names())
            self.assertIn('test2',
                          self.mongo_conn['test'].collection_names())

            self.mongo_doc.handle_command(
                {'drop': 'test2'}, 'test.$cmd', 1)
            self.assertNotIn('test2',
                             self.mongo_conn['test'].collection_names())

            self.assertIn('test', self.mongo_conn.database_names())
            self.mongo_doc.handle_command({'dropDatabase': 1}, 'test.$cmd', 1)
            self.assertNotIn('test', self.mongo_conn.database_names())

            # Briefly test mapped database name with dropDatabase command.
            self.mongo_conn.dropped.collection.insert({'a': 1})
            self.assertIn('dropped', self.mongo_conn.database_names())
            self.mongo_doc.handle_command({'dropDatabase': 1}, 'test.$cmd', 1)
            self.assertNotIn('dropped', self.mongo_conn.database_names())
        finally:
            self.mongo_conn.drop_database('test')
예제 #18
0
    def __init__(self, mongo_address, doc_managers=None, **kwargs):
        super(Connector, self).__init__()

        # can_run is set to false when we join the thread
        self.can_run = True

        # The signal that caused the connector to stop or None
        self.signal = None

        # main address - either mongos for sharded setups or a primary otherwise
        self.address = mongo_address

        # connection to the main address
        self.main_conn = None

        # List of DocManager instances
        if doc_managers:
            self.doc_managers = doc_managers
        else:
            LOG.warning('No doc managers specified, using simulator.')
            self.doc_managers = (simulator.DocManager(),)

        # Password for authentication
        self.auth_key = kwargs.pop('auth_key', None)

        # Username for authentication
        self.auth_username = kwargs.pop('auth_username', None)

        # The name of the file that stores the progress of the OplogThreads
        self.oplog_checkpoint = kwargs.pop('oplog_checkpoint',
                                           'oplog.timestamp')

        # The set of OplogThreads created
        self.shard_set = {}

        # Dict of OplogThread/timestamp pairs to record progress
        self.oplog_progress = LockingDict()

        # Timezone awareness
        self.tz_aware = kwargs.get('tz_aware', False)

        # SSL keyword arguments to MongoClient.
        ssl_certfile = kwargs.pop('ssl_certfile', None)
        ssl_ca_certs = kwargs.pop('ssl_ca_certs', None)
        ssl_keyfile = kwargs.pop('ssl_keyfile', None)
        ssl_cert_reqs = kwargs.pop('ssl_cert_reqs', None)
        self.ssl_kwargs = {}
        if ssl_certfile:
            self.ssl_kwargs['ssl_certfile'] = ssl_certfile
        if ssl_ca_certs:
            self.ssl_kwargs['ssl_ca_certs'] = ssl_ca_certs
        if ssl_keyfile:
            self.ssl_kwargs['ssl_keyfile'] = ssl_keyfile
        if ssl_cert_reqs:
            self.ssl_kwargs['ssl_cert_reqs'] = ssl_cert_reqs

        # Save the rest of kwargs.
        self.kwargs = kwargs

        # Replace the origin dest_mapping
        self.dest_mapping = DestMapping(kwargs.get('ns_set', []),
                                        kwargs.get('ex_ns_set', []),
                                        kwargs.get('dest_mapping', {}))

        # Initialize and set the command helper
        command_helper = CommandHelper(self.dest_mapping)
        for dm in self.doc_managers:
            dm.command_helper = command_helper

        if self.oplog_checkpoint is not None:
            if not os.path.exists(self.oplog_checkpoint):
                info_str = ("MongoConnector: Can't find %s, "
                            "attempting to create an empty progress log" %
                            self.oplog_checkpoint)
                LOG.warning(info_str)
                try:
                    # Create oplog progress file
                    open(self.oplog_checkpoint, "w").close()
                except IOError as e:
                    LOG.critical("MongoConnector: Could not "
                                 "create a progress log: %s" %
                                 str(e))
                    sys.exit(2)
            else:
                if (not os.access(self.oplog_checkpoint, os.W_OK)
                        and not os.access(self.oplog_checkpoint, os.R_OK)):
                    LOG.critical("Invalid permissions on %s! Exiting" %
                                 (self.oplog_checkpoint))
                    sys.exit(2)