def test_upgrade_oplog_progress(self):
     first_oplog_ts1 = self.opman1.oplog.find_one()['ts']
     first_oplog_ts2 = self.opman2.oplog.find_one()['ts']
     # Old format oplog progress file:
     progress = {
         str(self.opman1.oplog): bson_ts_to_long(first_oplog_ts1),
         str(self.opman2.oplog): bson_ts_to_long(first_oplog_ts2)
     }
     # Set up oplog managers to use the old format.
     oplog_progress = LockingDict()
     oplog_progress.dict = progress
     self.opman1.oplog_progress = oplog_progress
     self.opman2.oplog_progress = oplog_progress
     # Cause the oplog managers to update their checkpoints.
     self.opman1.update_checkpoint(first_oplog_ts1)
     self.opman2.update_checkpoint(first_oplog_ts2)
     # New format should be in place now.
     new_format = {
         self.opman1.replset_name: first_oplog_ts1,
         self.opman2.replset_name: first_oplog_ts2
     }
     self.assertEqual(
         new_format,
         self.opman1.oplog_progress.get_dict()
     )
     self.assertEqual(
         new_format,
         self.opman2.oplog_progress.get_dict()
     )
 def test_upgrade_oplog_progress(self):
     first_oplog_ts1 = self.opman1.oplog.find_one()['ts']
     first_oplog_ts2 = self.opman2.oplog.find_one()['ts']
     # Old format oplog progress file:
     progress = {
         str(self.opman1.oplog): bson_ts_to_long(first_oplog_ts1),
         str(self.opman2.oplog): bson_ts_to_long(first_oplog_ts2)
     }
     # Set up oplog managers to use the old format.
     oplog_progress = LockingDict()
     oplog_progress.dict = progress
     self.opman1.oplog_progress = oplog_progress
     self.opman2.oplog_progress = oplog_progress
     # Cause the oplog managers to update their checkpoints.
     self.opman1.update_checkpoint(first_oplog_ts1)
     self.opman2.update_checkpoint(first_oplog_ts2)
     # New format should be in place now.
     new_format = {
         self.opman1.replset_name: first_oplog_ts1,
         self.opman2.replset_name: first_oplog_ts2
     }
     self.assertEqual(
         new_format,
         self.opman1.oplog_progress.get_dict()
     )
     self.assertEqual(
         new_format,
         self.opman2.oplog_progress.get_dict()
     )
Example #3
0
    def test_init_cursor(self):
        """Test init_cursor in oplog_manager. Assertion failure if it
            doesn't pass
        """

        test_oplog, primary_conn, search_ts = self.get_oplog_thread()
        test_oplog.checkpoint = None  # needed for these tests

        # initial tests with no config file and empty oplog
        self.assertEqual(test_oplog.init_cursor(), None)

        # no config, single oplog entry
        primary_conn['test']['test'].insert({'name': 'paulie'})
        search_ts = test_oplog.get_last_oplog_timestamp()
        cursor = test_oplog.init_cursor()
        self.assertEqual(cursor.count(), 1)
        self.assertEqual(test_oplog.checkpoint, search_ts)

        # with config file, assert that size != 0
        os.system('touch temp_config.txt')

        cursor = test_oplog.init_cursor()
        oplog_dict = test_oplog.oplog_progress.get_dict()

        self.assertEqual(cursor.count(), 1)
        self.assertTrue(str(test_oplog.oplog) in oplog_dict)
        self.assertTrue(
            oplog_dict[str(test_oplog.oplog)] == test_oplog.checkpoint)

        os.system('rm temp_config.txt')

        # test init_cursor when OplogThread created with/without no-dump option
        # insert some documents (will need to be dumped)
        primary_conn['test']['test'].remove()
        primary_conn['test']['test'].insert(({"_id": i} for i in range(100)))

        # test no-dump option
        docman = test_oplog.doc_managers[0]
        docman._delete()
        test_oplog.collection_dump = False
        test_oplog.oplog_progress = LockingDict()
        # init_cursor has the side-effect of causing a collection dump
        test_oplog.init_cursor()
        self.assertEqual(len(docman._search()), 0)

        # test w/o no-dump option
        docman._delete()
        test_oplog.collection_dump = True
        test_oplog.oplog_progress = LockingDict()
        test_oplog.init_cursor()
        self.assertEqual(len(docman._search()), 100)
    def get_oplog_thread(cls):
        """ Set up connection with mongo. Returns oplog, the connection and
            oplog collection

            This function clears the oplog
        """
        is_sharded = True
        primary_conn = Connection(HOSTNAME, int(PORTS_ONE["PRIMARY"]))
        if primary_conn['admin'].command("isMaster")['ismaster'] is False:
            primary_conn = Connection(HOSTNAME, int(PORTS_ONE["SECONDARY"]))

        primary_conn['test']['test'].drop()
        mongos_addr = "%s:%s" % (HOSTNAME, PORTS_ONE['MAIN'])

        if PORTS_ONE["MAIN"] == PORTS_ONE["PRIMARY"]:
            mongos_addr = "%s:%s" % (HOSTNAME, PORTS_ONE['MAIN'])
            is_sharded = False
        oplog_coll = primary_conn['local']['oplog.rs']
        oplog_coll.drop()           # reset the oplog

        primary_conn['local'].create_collection('oplog.rs', capped=True,
                                                size=1000000)
        namespace_set = ['test.test']
        doc_manager = DocManager()
        oplog = OplogThread(primary_conn, mongos_addr, oplog_coll, is_sharded,
                            doc_manager, LockingDict(),
                            namespace_set, cls.AUTH_KEY, AUTH_USERNAME,
                            repl_set="demo-repl")

        return(oplog, primary_conn, oplog_coll)
    def setUp(self):
        # Create a new oplog progress file
        try:
            os.unlink("oplog.timestamp")
        except OSError:
            pass
        open("oplog.timestamp", "w").close()

        # Start a replica set
        self.repl_set = ReplicaSet().start()
        # Connection to the replica set as a whole
        self.main_conn = self.repl_set.client()
        # Connection to the primary specifically
        self.primary_conn = self.repl_set.primary.client()
        # Connection to the secondary specifically
        self.secondary_conn = self.repl_set.secondary.client(
            read_preference=ReadPreference.SECONDARY_PREFERRED)

        # Wipe any test data
        self.main_conn["test"]["mc"].drop()

        # Oplog thread
        doc_manager = DocManager()
        oplog_progress = LockingDict()
        self.opman = OplogThread(primary_client=self.main_conn,
                                 doc_managers=(doc_manager, ),
                                 oplog_progress_dict=oplog_progress,
                                 ns_set=["test.mc"])
Example #6
0
    def get_oplog_thread(cls):
        """ Set up connection with mongo.

        Returns oplog, the connection and oplog collection.
        This function clears the oplog.
        """
        primary_conn = Connection(HOSTNAME, int(PORTS_ONE["PRIMARY"]))
        if primary_conn['admin'].command("isMaster")['ismaster'] is False:
            primary_conn = Connection(HOSTNAME, int(PORTS_ONE["SECONDARY"]))

        mongos_addr = "%s:%s" % (HOSTNAME, PORTS_ONE["MONGOS"])
        mongos = Connection(mongos_addr)
        mongos['alpha']['foo'].drop()

        oplog_coll = primary_conn['local']['oplog.rs']
        oplog_coll.drop()  # reset the oplog

        primary_conn['local'].create_collection('oplog.rs',
                                                capped=True,
                                                size=1000000)
        namespace_set = ['test.test', 'alpha.foo']
        doc_manager = DocManager()
        oplog = OplogThread(primary_conn,
                            mongos_addr, oplog_coll, True, doc_manager,
                            LockingDict(), namespace_set, cls.AUTH_KEY,
                            AUTH_USERNAME)

        return (oplog, primary_conn, oplog_coll, mongos)
Example #7
0
    def get_new_oplog(cls):
        """ Set up connection with mongo. Returns oplog, the connection and
            oplog collection

            This function does not clear the oplog
        """
        is_sharded = True
        primary_conn = Connection(HOSTNAME, int(PORTS_ONE["PRIMARY"]))
        if primary_conn['admin'].command("isMaster")['ismaster'] is False:
            primary_conn = Connection(HOSTNAME, int(PORTS_ONE["SECONDARY"]))

        mongos_addr = "%s:%s" % (HOSTNAME, PORTS_ONE['MAIN'])
        if PORTS_ONE["MAIN"] == PORTS_ONE["PRIMARY"]:
            mongos_addr = "%s:%s" % (HOSTNAME, PORTS_ONE['MAIN'])
            is_sharded = False
        oplog_coll = primary_conn['local']['oplog.rs']

        namespace_set = ['test.test']
        doc_manager = DocManager()
        oplog = OplogThread(primary_conn=primary_conn,
                            main_address=mongos_addr,
                            oplog_coll=oplog_coll,
                            is_sharded=is_sharded,
                            doc_manager=doc_manager,
                            oplog_progress_dict=LockingDict(),
                            namespace_set=namespace_set,
                            auth_key=cls.AUTH_KEY,
                            auth_username=AUTH_USERNAME,
                            repl_set="demo-repl")
        return (oplog, primary_conn, oplog.main_connection, oplog_coll)
Example #8
0
    def test_skipped_oplog_entry_updates_checkpoint(self):
        repl_set = ReplicaSetSingle().start()
        conn = repl_set.client()
        opman = OplogThread(
            primary_client=conn,
            doc_managers=(DocManager(),),
            oplog_progress_dict=LockingDict(),
            namespace_config=NamespaceConfig(namespace_set=["test.test"]),
        )
        opman.start()

        # Insert a document into an included collection
        conn["test"]["test"].insert_one({"test": 1})
        last_ts = opman.get_last_oplog_timestamp()
        assert_soon(
            lambda: last_ts == opman.checkpoint,
            "OplogThread never updated checkpoint to non-skipped " "entry.",
        )
        self.assertEqual(len(opman.doc_managers[0]._search()), 1)

        # Make sure that the oplog thread updates its checkpoint on every
        # oplog entry.
        conn["test"]["ignored"].insert_one({"test": 1})
        last_ts = opman.get_last_oplog_timestamp()
        assert_soon(
            lambda: last_ts == opman.checkpoint,
            "OplogThread never updated checkpoint to skipped entry.",
        )
        opman.join()
        conn.close()
        repl_set.stop()
 def setUp(self):
     self.repl_set = ReplicaSet().start()
     self.primary_conn = self.repl_set.client()
     self.oplog_coll = self.primary_conn.local['oplog.rs']
     self.opman = OplogThread(primary_client=self.primary_conn,
                              doc_managers=(DocManager(), ),
                              oplog_progress_dict=LockingDict())
 def setUp(self):
     self.namespace_config = NamespaceConfig()
     self.opman = OplogThread(
         primary_client=self.primary_conn,
         doc_managers=(DocManager(), ),
         oplog_progress_dict=LockingDict(),
         namespace_config=self.namespace_config,
     )
Example #11
0
 def reset_opman(self, include_ns=None, exclude_ns=None, dest_mapping=None):
     self.namespace_config = NamespaceConfig(namespace_set=include_ns,
                                             ex_namespace_set=exclude_ns,
                                             namespace_options=dest_mapping)
     self.opman = OplogThread(primary_client=self.primary_conn,
                              doc_managers=(DocManager(), ),
                              oplog_progress_dict=LockingDict(),
                              namespace_config=self.namespace_config)
Example #12
0
    def test_fields_and_exclude(self):
        fields = ['a', 'b', 'c', '_id']
        exclude_fields = ['x', 'y', 'z']

        # Test setting both to None in constructor
        opman = OplogThread(primary_client=self.primary_conn,
                            doc_managers=(DocManager(), ),
                            oplog_progress_dict=LockingDict(),
                            dest_mapping_stru=self.dest_mapping_stru,
                            fields=None,
                            exclude_fields=None)
        self._check_fields(opman, [], [], None)
        opman = OplogThread(primary_client=self.primary_conn,
                            doc_managers=(DocManager(), ),
                            oplog_progress_dict=LockingDict(),
                            dest_mapping_stru=self.dest_mapping_stru,
                            fields=None,
                            exclude_fields=exclude_fields)
        self._check_fields(opman, [], exclude_fields,
                           dict((f, 0) for f in exclude_fields))
        # Test setting fields when exclude_fields is set
        self.assertRaises(errors.InvalidConfiguration, setattr, opman,
                          "fields", fields)
        self.assertRaises(errors.InvalidConfiguration, setattr, opman,
                          "fields", None)
        opman = OplogThread(primary_client=self.primary_conn,
                            doc_managers=(DocManager(), ),
                            oplog_progress_dict=LockingDict(),
                            dest_mapping_stru=self.dest_mapping_stru,
                            exclude_fields=None,
                            fields=fields)
        self._check_fields(opman, fields, [], dict((f, 1) for f in fields))
        self.assertRaises(errors.InvalidConfiguration, setattr, opman,
                          "exclude_fields", exclude_fields)
        self.assertRaises(errors.InvalidConfiguration, setattr, opman,
                          "exclude_fields", None)
        self.assertRaises(errors.InvalidConfiguration,
                          OplogThread,
                          self.primary_conn, (DocManager(), ),
                          LockingDict(),
                          self.dest_mapping_stru,
                          fields=fields,
                          exclude_fields=exclude_fields)
Example #13
0
 def setUp(self):
     self.repl_set = ReplicaSetSingle().start()
     self.primary_conn = self.repl_set.client()
     self.oplog_coll = self.primary_conn.local['oplog.rs']
     self.dest_mapping_stru = DestMapping([], [], {})
     self.opman = OplogThread(
         primary_client=self.primary_conn,
         doc_managers=(DocManager(),),
         oplog_progress_dict=LockingDict(),
         dest_mapping_stru=self.dest_mapping_stru,
     )
Example #14
0
 def setUp(self):
     self.repl_set = ReplicaSetSingle().start()
     self.primary_conn = self.repl_set.client()
     self.oplog_coll = self.primary_conn.local["oplog.rs"]
     self.opman = OplogThread(
         primary_client=self.primary_conn,
         doc_managers=(DocManager(),),
         oplog_progress_dict=LockingDict(),
         namespace_config=NamespaceConfig(
             namespace_options={"test.*": True, "gridfs.*": {"gridfs": True}}
         ),
     )
Example #15
0
    def test_dump_collection(self):
        """Test the dump_collection method

        Cases:

        1. empty oplog
        2. non-empty oplog, with gridfs collections
        3. non-empty oplog, specified a namespace-set, none of the oplog
           entries are for collections in the namespace-set
        """

        # Test with empty oplog
        self.opman.oplog = self.primary_conn["test"]["emptycollection"]
        last_ts = self.opman.dump_collection()
        self.assertEqual(last_ts, None)

        # Test with non-empty oplog with gridfs collections
        self.opman.oplog = self.primary_conn["local"]["oplog.rs"]
        # Insert 10 gridfs files
        for i in range(10):
            fs = gridfs.GridFS(self.primary_conn["gridfs"],
                               collection="test" + str(i))
            fs.put(b"hello world")
        # Insert 1000 documents
        for i in range(1000):
            self.primary_conn["test"]["test"].insert_one({
                "i": i + 500
            })
        last_ts = self.opman.get_last_oplog_timestamp()
        self.assertEqual(last_ts, self.opman.dump_collection())
        self.assertEqual(len(self.opman.doc_managers[0]._search()), 1010)

        # Case 3
        # 1MB oplog so that we can rollover quickly
        repl_set = ReplicaSetSingle(oplogSize=1).start()
        conn = repl_set.client()
        opman = OplogThread(
            primary_client=conn,
            doc_managers=(DocManager(),),
            oplog_progress_dict=LockingDict(),
            namespace_config=NamespaceConfig(namespace_set=["test.test"]),
        )
        # Insert a document into an included collection
        conn["test"]["test"].insert_one({"test": 1})
        # Cause the oplog to rollover on a non-included collection
        while conn["local"]["oplog.rs"].find_one({"ns": "test.test"}):
            conn["test"]["ignored"].insert_many(
                [{"test": "1" * 1024} for _ in range(1024)])
        last_ts = opman.get_last_oplog_timestamp()
        self.assertEqual(last_ts, opman.dump_collection())
        self.assertEqual(len(opman.doc_managers[0]._search()), 1)
        conn.close()
        repl_set.stop()
Example #16
0
    def test_dump_collection(self):
        """Test the dump_collection method

        Cases:

        1. empty oplog
        2. non-empty oplog
        3. non-empty oplog, specified a namespace-set, none of the oplog
           entries are for collections in the namespace-set
        """

        # Test with empty oplog
        self.opman.oplog = self.primary_conn["test"]["emptycollection"]
        last_ts = self.opman.dump_collection()
        self.assertEqual(last_ts, None)

        # Test with non-empty oplog
        self.opman.oplog = self.primary_conn["local"]["oplog.rs"]
        for i in range(1000):
            self.primary_conn["test"]["test"].insert_one({
                "i": i + 500
            })
        last_ts = self.opman.get_last_oplog_timestamp()
        self.assertEqual(last_ts, self.opman.dump_collection())
        self.assertEqual(len(self.opman.doc_managers[0]._search()), 1000)

        # Case 3
        # 1MB oplog so that we can rollover quickly
        repl_set = ReplicaSetSingle(oplogSize=1).start()
        conn = repl_set.client()
        dest_mapping_stru = DestMapping(["test.test"], [], {})
        opman = OplogThread(
            primary_client=conn,
            doc_managers=(DocManager(),),
            oplog_progress_dict=LockingDict(),
            dest_mapping_stru=dest_mapping_stru,
            ns_set=set(["test.test"])
        )
        # Insert a document into a ns_set collection
        conn["test"]["test"].insert_one({"test": 1})
        # Cause the oplog to rollover on a non-ns_set collection
        while conn["local"]["oplog.rs"].find_one({"ns": "test.test"}):
            conn["test"]["ignored"].insert_many(
                [{"test": "1" * 1024} for _ in range(1024)])
        last_ts = opman.get_last_oplog_timestamp()
        self.assertEqual(last_ts, opman.dump_collection())
        self.assertEqual(len(opman.doc_managers[0]._search()), 1)
        conn.close()
        repl_set.stop()
Example #17
0
 def setUp(self):
     _, _, self.primary_p = start_replica_set('test-oplog-manager')
     self.primary_conn = pymongo.MongoClient(mongo_host, self.primary_p)
     self.oplog_coll = self.primary_conn.local['oplog.rs']
     self.opman = OplogThread(primary_conn=self.primary_conn,
                              main_address='%s:%d' %
                              (mongo_host, self.primary_p),
                              oplog_coll=self.oplog_coll,
                              is_sharded=False,
                              doc_manager=DocManager(),
                              oplog_progress_dict=LockingDict(),
                              namespace_set=None,
                              auth_key=None,
                              auth_username=None,
                              repl_set='test-oplog-manager')
Example #18
0
    def reset_opman(self, include_ns=None, exclude_ns=None, dest_mapping=None):
        if include_ns is None:
            include_ns = []
        if exclude_ns is None:
            exclude_ns = []
        if dest_mapping is None:
            dest_mapping = {}

        # include_ns must not exist together with exclude_ns
        # dest_mapping must exist together with include_ns
        # those checks have been tested in test_config.py so we skip that here.

        self.dest_mapping_stru = DestMapping(include_ns, exclude_ns,
                                             dest_mapping)
        self.opman = OplogThread(primary_client=self.primary_conn,
                                 doc_managers=(DocManager(), ),
                                 oplog_progress_dict=LockingDict(),
                                 dest_mapping_stru=self.dest_mapping_stru,
                                 ns_set=include_ns,
                                 ex_ns_set=exclude_ns)
Example #19
0
    def get_new_oplog(cls):
        """ Set up connection with mongo.

        Returns oplog, the connection and oplog collection
        This function does not clear the oplog
        """
        primary_conn = Connection(HOSTNAME, int(PORTS_ONE["PRIMARY"]))
        if primary_conn['admin'].command("isMaster")['ismaster'] is False:
            primary_conn = Connection(HOSTNAME, int(PORTS_ONE["SECONDARY"]))

        mongos = "%s:%s" % (HOSTNAME, PORTS_ONE["MONGOS"])
        oplog_coll = primary_conn['local']['oplog.rs']

        namespace_set = ['test.test', 'alpha.foo']
        doc_manager = DocManager()
        oplog = OplogThread(primary_conn, mongos, oplog_coll, True,
                            doc_manager, LockingDict(), namespace_set,
                            cls.AUTH_KEY, AUTH_USERNAME)

        return (oplog, primary_conn, oplog_coll, oplog.main_connection)
    def setUp(self):
        # Create a new oplog progress file
        try:
            os.unlink("config.txt")
        except OSError:
            pass
        open("config.txt", "w").close()

        # Start a replica set
        _, self.secondary_p, self.primary_p = start_replica_set('rollbacks')
        # Connection to the replica set as a whole
        self.main_conn = MongoClient('%s:%d' % (mongo_host, self.primary_p),
                                     replicaSet='rollbacks')
        # Connection to the primary specifically
        self.primary_conn = MongoClient('%s:%d' % (mongo_host, self.primary_p))
        # Connection to the secondary specifically
        self.secondary_conn = MongoClient(
            '%s:%d' % (mongo_host, self.secondary_p),
            read_preference=ReadPreference.SECONDARY_PREFERRED
        )

        # Wipe any test data
        self.main_conn["test"]["mc"].drop()

        # Oplog thread
        doc_manager = DocManager()
        oplog_progress = LockingDict()
        self.opman = OplogThread(
            primary_conn=self.main_conn,
            main_address='%s:%d' % (mongo_host, self.primary_p),
            oplog_coll=self.main_conn["local"]["oplog.rs"],
            is_sharded=False,
            doc_manager=doc_manager,
            oplog_progress_dict=oplog_progress,
            namespace_set=["test.mc"],
            auth_key=None,
            auth_username=None,
            repl_set="rollbacks"
        )
    def setUp(self):
        # Create a new oplog progress file
        try:
            os.unlink("config.txt")
        except OSError:
            pass
        open("config.txt", "w").close()

        # Start a replica set
        start_cluster(sharded=False, use_mongos=False)
        # Connection to the replica set as a whole
        self.main_conn = Connection("localhost:%s" % PORTS_ONE["PRIMARY"],
                                    replicaSet="demo-repl")
        # Connection to the primary specifically
        self.primary_conn = Connection("localhost:%s" % PORTS_ONE["PRIMARY"])
        # Connection to the secondary specifically
        self.secondary_conn = Connection(
            "localhost:%s" % PORTS_ONE["SECONDARY"],
            read_preference=ReadPreference.SECONDARY_PREFERRED)

        # Wipe any test data
        self.main_conn["test"]["mc"].drop()

        # Oplog thread
        doc_manager = DocManager()
        oplog_progress = LockingDict()
        self.opman = OplogThread(
            primary_conn=self.main_conn,
            main_address="localhost:%s" % PORTS_ONE["PRIMARY"],
            oplog_coll=self.main_conn["local"]["oplog.rs"],
            is_sharded=False,
            doc_manager=doc_manager,
            oplog_progress_dict=oplog_progress,
            namespace_set=["test.mc"],
            auth_key=None,
            auth_username=None,
            repl_set="demo-repl")
Example #22
0
 def setUp(self):
     self.dest_mapping_stru = DestMapping([], [], {})
     self.opman = OplogThread(primary_client=self.primary_conn,
                              doc_managers=(DocManager(), ),
                              oplog_progress_dict=LockingDict(),
                              dest_mapping_stru=self.dest_mapping_stru)
Example #23
0
class Connector(threading.Thread):
    """Checks the cluster for shards to tail.
    """
    def __init__(self, address, oplog_checkpoint, target_url, ns_set,
                 u_key, auth_key, doc_manager=None, auth_username=None):
        if doc_manager is not None:
            doc_manager = imp.load_source('DocManager', doc_manager)
        else:
            from mongo_connector.doc_manager import DocManager
        time.sleep(1)
        super(Connector, self).__init__()

        #can_run is set to false when we join the thread
        self.can_run = True

        #The name of the file that stores the progress of the OplogThreads
        self.oplog_checkpoint = oplog_checkpoint

        #main address - either mongos for sharded setups or a primary otherwise
        self.address = address

        #The URL of the target system
        self.target_url = target_url

        #The set of relevant namespaces to consider
        self.ns_set = ns_set

        #The key that is a unique document identifier for the target system.
        #Not necessarily the mongo unique key.
        self.u_key = u_key

        #Password for authentication
        self.auth_key = auth_key

        #Username for authentication
        self.auth_username = auth_username

        #The set of OplogThreads created
        self.shard_set = {}

        #Dict of OplogThread/timestamp pairs to record progress
        self.oplog_progress = LockingDict()

        try:
            if target_url is None:
                if doc_manager is None:  # imported using from... import
                    self.doc_manager = DocManager(unique_key=u_key)
                else:  # imported using load source
                    self.doc_manager = doc_manager.DocManager(unique_key=u_key)
            else:
                if doc_manager is None:
                    self.doc_manager = DocManager(self.target_url,
                                                  unique_key=u_key)
                else:
                    self.doc_manager = doc_manager.DocManager(self.target_url,
                                                              unique_key=u_key)
        except errors.ConnectionFailed:
            err_msg = "MongoConnector: Could not connect to target system"
            logging.critical(err_msg)
            self.can_run = False
            return

        if self.oplog_checkpoint is not None:
            if not os.path.exists(self.oplog_checkpoint):
                info_str = ("MongoConnector: Can't find %s, "
                            "attempting to create an empty progress log" %
                            self.oplog_checkpoint)
                logging.info(info_str)
                try:
                    # Create oplog progress file
                    open(self.oplog_checkpoint, "w").close()
                except IOError as e:
                    logging.critical("MongoConnector: Could not "
                                     "create a progress log: %s" %
                                     str(e))
                    sys.exit(1)
            else:
                if (not os.access(self.oplog_checkpoint, os.W_OK)
                        and not os.access(self.oplog_checkpoint, os.R_OK )):
                    logging.critical("Invalid permissions on %s! Exiting" %
                        (self.oplog_checkpoint))
                    sys.exit(1)

    def join(self):
        """ Joins thread, stops it from running
        """
        self.can_run = False
        self.doc_manager.stop()
        threading.Thread.join(self)

    def write_oplog_progress(self):
        """ Writes oplog progress to file provided by user
        """

        if self.oplog_checkpoint is None:
            return None

        # write to temp file
        backup_file = self.oplog_checkpoint + '.backup'
        os.rename(self.oplog_checkpoint, backup_file)

        # for each of the threads write to file
        with open(self.oplog_checkpoint, 'w') as dest:
            with self.oplog_progress as oplog_prog:

                oplog_dict = oplog_prog.get_dict()
                for oplog, time_stamp in oplog_dict.items():
                    oplog_str = str(oplog)
                    timestamp = util.bson_ts_to_long(time_stamp)
                    json_str = json.dumps([oplog_str, timestamp])
                    try:
                        dest.write(json_str)
                    except IOError:
                        # Basically wipe the file, copy from backup
                        dest.truncate()
                        with open(backup_file, 'r') as backup:
                            shutil.copyfile(backup, dest)
                        break

        os.remove(self.oplog_checkpoint + '.backup')

    def read_oplog_progress(self):
        """Reads oplog progress from file provided by user.
        This method is only called once before any threads are spanwed.
        """

        if self.oplog_checkpoint is None:
            return None

        # Check for empty file
        try:
            if os.stat(self.oplog_checkpoint).st_size == 0:
                logging.info("MongoConnector: Empty oplog progress file.")
                return None
        except OSError:
            return None

        source = open(self.oplog_checkpoint, 'r')
        try:
            data = json.load(source)
        except ValueError:       # empty file
            reason = "It may be empty or corrupt."
            logging.info("MongoConnector: Can't read oplog progress file. %s" %
                (reason))
            source.close()
            return None

        source.close()

        count = 0
        oplog_dict = self.oplog_progress.get_dict()
        for count in range(0, len(data), 2):
            oplog_str = data[count]
            time_stamp = data[count + 1]
            oplog_dict[oplog_str] = util.long_to_bson_ts(time_stamp)
            #stored as bson_ts

    def run(self):
        """Discovers the mongo cluster and creates a thread for each primary.
        """
        main_conn = Connection(self.address)
        if self.auth_key is not None:
            main_conn['admin'].authenticate(self.auth_username, self.auth_key)
        self.read_oplog_progress()
        conn_type = None

        try:
            main_conn.admin.command("isdbgrid")
        except pymongo.errors.OperationFailure:
            conn_type = "REPLSET"

        if conn_type == "REPLSET":
            #non sharded configuration
            oplog_coll = main_conn['local']['oplog.rs']

            prim_admin = main_conn.admin
            repl_set = prim_admin.command("replSetGetStatus")['set']

            oplog = oplog_manager.OplogThread(main_conn,
                (main_conn.host + ":" + str(main_conn.port)),
                oplog_coll,
                False, self.doc_manager,
                self.oplog_progress,
                self.ns_set, self.auth_key,
                self.auth_username,
                repl_set=repl_set)
            self.shard_set[0] = oplog
            logging.info('MongoConnector: Starting connection thread %s' %
                         main_conn)
            oplog.start()

            while self.can_run:
                if not self.shard_set[0].running:
                    logging.error("MongoConnector: OplogThread"
                        " %s unexpectedly stopped! Shutting down" %
                        (str(self.shard_set[0])))
                    self.oplog_thread_join()
                    self.doc_manager.stop()
                    return

                self.write_oplog_progress()
                time.sleep(1)

        else:       # sharded cluster
            while self.can_run is True:

                for shard_doc in main_conn['config']['shards'].find():
                    shard_id = shard_doc['_id']
                    if shard_id in self.shard_set:
                        if not self.shard_set[shard_id].running:
                            logging.error("MongoConnector: OplogThread"
                                " %s unexpectedly stopped! Shutting down" %
                                (str(self.shard_set[shard_id])))
                            self.oplog_thread_join()
                            self.doc_manager.stop()
                            return

                        self.write_oplog_progress()
                        time.sleep(1)
                        continue
                    try:
                        repl_set, hosts = shard_doc['host'].split('/')
                    except ValueError:
                        cause = "The system only uses replica sets!"
                        logging.error("MongoConnector: %s", cause)
                        self.oplog_thread_join()
                        self.doc_manager.stop()
                        return

                    shard_conn = Connection(hosts, replicaset=repl_set)
                    oplog_coll = shard_conn['local']['oplog.rs']
                    oplog = oplog_manager.OplogThread(shard_conn, self.address,
                                                      oplog_coll, True,
                                                      self.doc_manager,
                                                      self.oplog_progress,
                                                      self.ns_set,
                                                      self.auth_key,
                                                      self.auth_username)
                    self.shard_set[shard_id] = oplog
                    msg = "Starting connection thread"
                    logging.info("MongoConnector: %s %s" % (msg, shard_conn))
                    oplog.start()

        self.oplog_thread_join()
        self.write_oplog_progress()

    def oplog_thread_join(self):
        """Stops all the OplogThreads
        """
        logging.info('MongoConnector: Stopping all OplogThreads')
        for thread in self.shard_set.values():
            thread.join()
Example #24
0
    def test_exclude_fields_constructor(self):
        # Test with the "_id" field in exclude_fields
        exclude_fields = ["_id", "title", "content", "author"]
        opman = OplogThread(primary_client=self.primary_conn,
                            doc_managers=(DocManager(), ),
                            oplog_progress_dict=LockingDict(),
                            dest_mapping_stru=self.dest_mapping_stru,
                            exclude_fields=exclude_fields)
        exclude_fields.remove('_id')
        self._check_fields(opman, [], exclude_fields,
                           dict((f, 0) for f in exclude_fields))
        extra_fields = exclude_fields + ['extra1', 'extra2']
        filtered = opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual(dict((f, 1) for f in ['extra1', 'extra2']), filtered)

        # Test without "_id" field included in exclude_fields
        exclude_fields = ["title", "content", "author"]
        opman = OplogThread(primary_client=self.primary_conn,
                            doc_managers=(DocManager(), ),
                            oplog_progress_dict=LockingDict(),
                            dest_mapping_stru=self.dest_mapping_stru,
                            exclude_fields=exclude_fields)
        self._check_fields(opman, [], exclude_fields,
                           dict((f, 0) for f in exclude_fields))
        extra_fields = extra_fields + ['extra1', 'extra2']
        filtered = opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual({'extra1': 1, 'extra2': 1}, filtered)

        # Test with only "_id" field in exclude_fields
        exclude_fields = ["_id"]
        opman = OplogThread(primary_client=self.primary_conn,
                            doc_managers=(DocManager(), ),
                            oplog_progress_dict=LockingDict(),
                            dest_mapping_stru=self.dest_mapping_stru,
                            exclude_fields=exclude_fields)
        self._check_fields(opman, [], [], None)
        extra_fields = exclude_fields + ['extra1', 'extra2']
        filtered = opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual(dict((f, 1) for f in extra_fields), filtered)

        # Test with nothing set for exclude_fields
        opman = OplogThread(primary_client=self.primary_conn,
                            doc_managers=(DocManager(), ),
                            oplog_progress_dict=LockingDict(),
                            dest_mapping_stru=self.dest_mapping_stru,
                            exclude_fields=None)
        self._check_fields(opman, [], [], None)
        extra_fields = ['_id', 'extra1', 'extra2']
        filtered = opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual(dict((f, 1) for f in extra_fields), filtered)
Example #25
0
    def test_fields_constructor(self):
        # Test with "_id" field in constructor
        fields = ["_id", "title", "content", "author"]
        opman = OplogThread(primary_client=self.primary_conn,
                            doc_managers=(DocManager(), ),
                            oplog_progress_dict=LockingDict(),
                            dest_mapping_stru=self.dest_mapping_stru,
                            fields=fields)
        self._check_fields(opman, fields, [], dict((f, 1) for f in fields))
        extra_fields = fields + ['extra1', 'extra2']
        filtered = opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual(dict((f, 1) for f in fields), filtered)

        # Test without "_id" field in constructor
        fields = ["title", "content", "author"]
        opman = OplogThread(primary_client=self.primary_conn,
                            doc_managers=(DocManager(), ),
                            oplog_progress_dict=LockingDict(),
                            dest_mapping_stru=self.dest_mapping_stru,
                            fields=fields)
        fields.append('_id')
        self._check_fields(opman, fields, [], dict((f, 1) for f in fields))
        extra_fields = fields + ['extra1', 'extra2']
        filtered = opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual(dict((f, 1) for f in fields), filtered)

        # Test with only "_id" field
        fields = ["_id"]
        opman = OplogThread(primary_client=self.primary_conn,
                            doc_managers=(DocManager(), ),
                            oplog_progress_dict=LockingDict(),
                            dest_mapping_stru=self.dest_mapping_stru,
                            fields=fields)
        self._check_fields(opman, fields, [], dict((f, 1) for f in fields))
        extra_fields = fields + ['extra1', 'extra2']
        filtered = opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual({'_id': 1}, filtered)

        # Test with no fields set
        opman = OplogThread(primary_client=self.primary_conn,
                            doc_managers=(DocManager(), ),
                            oplog_progress_dict=LockingDict(),
                            dest_mapping_stru=self.dest_mapping_stru)
        self._check_fields(opman, [], [], None)
        extra_fields = ['_id', 'extra1', 'extra2']
        filtered = opman.filter_oplog_entry({
            'op':
            'i',
            'o':
            dict((f, 1) for f in extra_fields)
        })['o']
        self.assertEqual(dict((f, 1) for f in extra_fields), filtered)
Example #26
0
 def setUp(self):
     self.repl_set = ReplicaSet().start()
     self.primary_conn = self.repl_set.client()
     self.oplog_progress = LockingDict()
     self.opman = None
Example #27
0
    def __init__(self, mongo_address, doc_managers=None, **kwargs):
        super(Connector, self).__init__()

        # can_run is set to false when we join the thread
        self.can_run = True

        # The signal that caused the connector to stop or None
        self.signal = None

        # main address - either mongos for sharded setups or a primary otherwise
        self.address = mongo_address

        # connection to the main address
        self.main_conn = None

        # List of DocManager instances
        if doc_managers:
            self.doc_managers = doc_managers
        else:
            LOG.warning('No doc managers specified, using simulator.')
            self.doc_managers = (simulator.DocManager(),)

        # Password for authentication
        self.auth_key = kwargs.pop('auth_key', None)

        # Username for authentication
        self.auth_username = kwargs.pop('auth_username', None)

        # The name of the file that stores the progress of the OplogThreads
        self.oplog_checkpoint = kwargs.pop('oplog_checkpoint',
                                           'oplog.timestamp')

        # The set of OplogThreads created
        self.shard_set = {}

        # Dict of OplogThread/timestamp pairs to record progress
        self.oplog_progress = LockingDict()

        # Timezone awareness
        self.tz_aware = kwargs.get('tz_aware', False)

        # SSL keyword arguments to MongoClient.
        ssl_certfile = kwargs.pop('ssl_certfile', None)
        ssl_ca_certs = kwargs.pop('ssl_ca_certs', None)
        ssl_keyfile = kwargs.pop('ssl_keyfile', None)
        ssl_cert_reqs = kwargs.pop('ssl_cert_reqs', None)
        self.ssl_kwargs = {}
        if ssl_certfile:
            self.ssl_kwargs['ssl_certfile'] = ssl_certfile
        if ssl_ca_certs:
            self.ssl_kwargs['ssl_ca_certs'] = ssl_ca_certs
        if ssl_keyfile:
            self.ssl_kwargs['ssl_keyfile'] = ssl_keyfile
        if ssl_cert_reqs:
            self.ssl_kwargs['ssl_cert_reqs'] = ssl_cert_reqs

        # Save the rest of kwargs.
        self.kwargs = kwargs

        # Replace the origin dest_mapping
        self.dest_mapping = DestMapping(kwargs.get('ns_set', []),
                                        kwargs.get('ex_ns_set', []),
                                        kwargs.get('dest_mapping', {}))

        # Initialize and set the command helper
        command_helper = CommandHelper(self.dest_mapping)
        for dm in self.doc_managers:
            dm.command_helper = command_helper

        if self.oplog_checkpoint is not None:
            if not os.path.exists(self.oplog_checkpoint):
                info_str = ("MongoConnector: Can't find %s, "
                            "attempting to create an empty progress log" %
                            self.oplog_checkpoint)
                LOG.warning(info_str)
                try:
                    # Create oplog progress file
                    open(self.oplog_checkpoint, "w").close()
                except IOError as e:
                    LOG.critical("MongoConnector: Could not "
                                 "create a progress log: %s" %
                                 str(e))
                    sys.exit(2)
            else:
                if (not os.access(self.oplog_checkpoint, os.W_OK)
                        and not os.access(self.oplog_checkpoint, os.R_OK)):
                    LOG.critical("Invalid permissions on %s! Exiting" %
                                 (self.oplog_checkpoint))
                    sys.exit(2)
Example #28
0
    def test_init_cursor(self):
        """Test the init_cursor method

        Cases:

        1. no last checkpoint, no collection dump
        2. no last checkpoint, collection dump ok and stuff to dump
        3. no last checkpoint, nothing to dump, stuff in oplog
        4. no last checkpoint, nothing to dump, nothing in oplog
        5. no last checkpoint, no collection dump, stuff in oplog
        6. last checkpoint exists
        7. last checkpoint is behind
        """

        # N.B. these sub-cases build off of each other and cannot be re-ordered
        # without side-effects

        # No last checkpoint, no collection dump, nothing in oplog
        # "change oplog collection" to put nothing in oplog
        self.opman1.oplog = self.shard1_conn["test"]["emptycollection"]
        self.opman2.oplog = self.shard2_conn["test"]["emptycollection"]
        self.opman1.collection_dump = False
        self.opman2.collection_dump = False
        self.assertTrue(
            all(doc['op'] == 'n' for doc in self.opman1.init_cursor()[0]))
        self.assertEqual(self.opman1.checkpoint, None)
        self.assertTrue(
            all(doc['op'] == 'n' for doc in self.opman2.init_cursor()[0]))
        self.assertEqual(self.opman2.checkpoint, None)

        # No last checkpoint, empty collections, nothing in oplog
        self.opman1.collection_dump = self.opman2.collection_dump = True

        cursor, cursor_len = self.opman1.init_cursor()
        self.assertEqual(cursor, None)
        self.assertEqual(cursor_len, 0)
        self.assertEqual(self.opman1.checkpoint, None)
        cursor, cursor_len = self.opman2.init_cursor()
        self.assertEqual(cursor, None)
        self.assertEqual(cursor_len, 0)
        self.assertEqual(self.opman2.checkpoint, None)

        # No last checkpoint, empty collections, something in oplog
        self.opman1.oplog = self.shard1_conn["local"]["oplog.rs"]
        self.opman2.oplog = self.shard2_conn["local"]["oplog.rs"]
        oplog_startup_ts = self.opman2.get_last_oplog_timestamp()
        collection = self.mongos_conn["test"]["mcsharded"]
        collection.insert({"i": 1})
        collection.remove({"i": 1})
        time.sleep(3)
        last_ts1 = self.opman1.get_last_oplog_timestamp()
        cursor, cursor_len = self.opman1.init_cursor()
        self.assertEqual(cursor_len, 0)
        self.assertEqual(self.opman1.checkpoint, last_ts1)
        with self.opman1.oplog_progress as prog:
            self.assertEqual(prog.get_dict()[str(self.opman1.oplog)], last_ts1)
        # init_cursor should point to startup message in shard2 oplog
        cursor, cursor_len = self.opman2.init_cursor()
        self.assertEqual(cursor_len, 0)
        self.assertEqual(self.opman2.checkpoint, oplog_startup_ts)

        # No last checkpoint, no collection dump, stuff in oplog
        progress = LockingDict()
        self.opman1.oplog_progress = self.opman2.oplog_progress = progress
        self.opman1.collection_dump = self.opman2.collection_dump = False
        collection.insert({"i": 1200})
        last_ts2 = self.opman2.get_last_oplog_timestamp()
        self.opman1.init_cursor()
        self.assertEqual(self.opman1.checkpoint, last_ts1)
        with self.opman1.oplog_progress as prog:
            self.assertEqual(prog.get_dict()[str(self.opman1.oplog)], last_ts1)
        cursor, cursor_len = self.opman2.init_cursor()
        for i in range(cursor_len - 1):
            next(cursor)
        self.assertEqual(next(cursor)["o"]["i"], 1200)
        self.assertEqual(self.opman2.checkpoint, last_ts2)
        with self.opman2.oplog_progress as prog:
            self.assertEqual(prog.get_dict()[str(self.opman2.oplog)], last_ts2)

        # Last checkpoint exists
        progress = LockingDict()
        self.opman1.oplog_progress = self.opman2.oplog_progress = progress
        for i in range(1000):
            collection.insert({"i": i + 500})
        entry1 = list(self.shard1_conn["local"]["oplog.rs"].find(skip=200,
                                                                 limit=2))
        entry2 = list(self.shard2_conn["local"]["oplog.rs"].find(skip=200,
                                                                 limit=2))
        progress.get_dict()[str(self.opman1.oplog)] = entry1[0]["ts"]
        progress.get_dict()[str(self.opman2.oplog)] = entry2[0]["ts"]
        self.opman1.oplog_progress = self.opman2.oplog_progress = progress
        self.opman1.checkpoint = self.opman2.checkpoint = None
        cursor1, cursor_len1 = self.opman1.init_cursor()
        cursor2, cursor_len2 = self.opman2.init_cursor()
        self.assertEqual(entry1[1]["ts"], next(cursor1)["ts"])
        self.assertEqual(entry2[1]["ts"], next(cursor2)["ts"])
        self.assertEqual(self.opman1.checkpoint, entry1[0]["ts"])
        self.assertEqual(self.opman2.checkpoint, entry2[0]["ts"])
        with self.opman1.oplog_progress as prog:
            self.assertEqual(prog.get_dict()[str(self.opman1.oplog)],
                             entry1[0]["ts"])
        with self.opman2.oplog_progress as prog:
            self.assertEqual(prog.get_dict()[str(self.opman2.oplog)],
                             entry2[0]["ts"])

        # Last checkpoint is behind
        progress = LockingDict()
        progress.get_dict()[str(self.opman1.oplog)] = bson.Timestamp(1, 0)
        progress.get_dict()[str(self.opman2.oplog)] = bson.Timestamp(1, 0)
        self.opman1.oplog_progress = self.opman2.oplog_progress = progress
        self.opman1.checkpoint = self.opman2.checkpoint = None
        cursor, cursor_len = self.opman1.init_cursor()
        self.assertEqual(cursor_len, 0)
        self.assertEqual(cursor, None)
        self.assertIsNotNone(self.opman1.checkpoint)
        cursor, cursor_len = self.opman2.init_cursor()
        self.assertEqual(cursor_len, 0)
        self.assertEqual(cursor, None)
        self.assertIsNotNone(self.opman2.checkpoint)
    def __init__(self, address, oplog_checkpoint, target_url, ns_set,
                 u_key, auth_key, doc_manager=None, auth_username=None,
                 collection_dump=True, batch_size=constants.DEFAULT_BATCH_SIZE,
                 fields=None, dest_mapping={},
                 auto_commit_interval=constants.DEFAULT_COMMIT_INTERVAL,
                 continue_on_error=False):

        if target_url and not doc_manager:
            raise errors.ConnectorError("Cannot create a Connector with a "
                                        "target URL but no doc manager!")

        def is_string(s):
            try:
                return isinstance(s, basestring)
            except NameError:
                return isinstance(s, str)

        def load_doc_manager(path):
            name, _ = os.path.splitext(os.path.basename(path))
            try:
                import importlib.machinery
                loader = importlib.machinery.SourceFileLoader(name, path)
                module = loader.load_module(name)
            except ImportError:
                module = imp.load_source(name, path)
            return module

        doc_manager_modules = None

        if doc_manager is not None:
            # backwards compatilibity: doc_manager may be a string
            if is_string(doc_manager):
                doc_manager_modules = [load_doc_manager(doc_manager)]
            # doc_manager is a list
            else:
                doc_manager_modules = []
                for dm in doc_manager:
                    doc_manager_modules.append(load_doc_manager(dm))

        super(Connector, self).__init__()

        #can_run is set to false when we join the thread
        self.can_run = True

        #The name of the file that stores the progress of the OplogThreads
        self.oplog_checkpoint = oplog_checkpoint

        #main address - either mongos for sharded setups or a primary otherwise
        self.address = address

        #The URLs of each target system, respectively
        if is_string(target_url):
            self.target_urls = [target_url]
        elif target_url:
            self.target_urls = list(target_url)
        else:
            self.target_urls = None

        #The set of relevant namespaces to consider
        self.ns_set = ns_set

        #The dict of source namespace to destination namespace
        self.dest_mapping = dest_mapping

        #Whether the collection dump gracefully handles exceptions
        self.continue_on_error = continue_on_error

        #The key that is a unique document identifier for the target system.
        #Not necessarily the mongo unique key.
        self.u_key = u_key

        #Password for authentication
        self.auth_key = auth_key

        #Username for authentication
        self.auth_username = auth_username

        #The set of OplogThreads created
        self.shard_set = {}

        #Boolean chooses whether to dump the entire collection if no timestamp
        # is present in the config file
        self.collection_dump = collection_dump

        #Num entries to process before updating config file with current pos
        self.batch_size = batch_size

        #Dict of OplogThread/timestamp pairs to record progress
        self.oplog_progress = LockingDict()

        # List of fields to export
        self.fields = fields

        try:
            docman_kwargs = {"unique_key": u_key,
                             "namespace_set": ns_set,
                             "auto_commit_interval": auto_commit_interval}

            # No doc managers specified, using simulator
            if doc_manager is None:
                self.doc_managers = [simulator.DocManager(**docman_kwargs)]
            else:
                self.doc_managers = []
                for i, d in enumerate(doc_manager_modules):
                    # self.target_urls may be shorter than
                    # self.doc_managers, or left as None
                    if self.target_urls and i < len(self.target_urls):
                        target_url = self.target_urls[i]
                    else:
                        target_url = None

                    if target_url:
                        self.doc_managers.append(
                            d.DocManager(self.target_urls[i],
                                         **docman_kwargs))
                    else:
                        self.doc_managers.append(
                            d.DocManager(**docman_kwargs))
                # If more target URLs were given than doc managers, may need
                # to create additional doc managers
                for url in self.target_urls[i + 1:]:
                    self.doc_managers.append(
                        doc_manager_modules[-1].DocManager(url,
                                                           **docman_kwargs))
        except errors.ConnectionFailed:
            err_msg = "MongoConnector: Could not connect to target system"
            logging.critical(err_msg)
            self.can_run = False
            return

        if self.oplog_checkpoint is not None:
            if not os.path.exists(self.oplog_checkpoint):
                info_str = ("MongoConnector: Can't find %s, "
                            "attempting to create an empty progress log" %
                            self.oplog_checkpoint)
                logging.info(info_str)
                try:
                    # Create oplog progress file
                    open(self.oplog_checkpoint, "w").close()
                except IOError as e:
                    logging.critical("MongoConnector: Could not "
                                     "create a progress log: %s" %
                                     str(e))
                    sys.exit(2)
            else:
                if (not os.access(self.oplog_checkpoint, os.W_OK)
                        and not os.access(self.oplog_checkpoint, os.R_OK)):
                    logging.critical("Invalid permissions on %s! Exiting" %
                                     (self.oplog_checkpoint))
                    sys.exit(2)
    def test_init_cursor(self):
        """Test the init_cursor method

        Cases:

        1. no last checkpoint, no collection dump
        2. no last checkpoint, collection dump ok and stuff to dump
        3. no last checkpoint, nothing to dump, stuff in oplog
        4. no last checkpoint, nothing to dump, nothing in oplog
        5. last checkpoint exists
        """

        # N.B. these sub-cases build off of each other and cannot be re-ordered
        # without side-effects

        # No last checkpoint, no collection dump, nothing in oplog
        # "change oplog collection" to put nothing in oplog
        self.opman1.oplog = self.shard1_conn["test"]["emptycollection"]
        self.opman2.oplog = self.shard2_conn["test"]["emptycollection"]
        self.opman1.collection_dump = False
        self.opman2.collection_dump = False
        self.assertEqual(self.opman1.init_cursor(), None)
        self.assertEqual(self.opman1.checkpoint, None)
        self.assertEqual(self.opman2.init_cursor(), None)
        self.assertEqual(self.opman2.checkpoint, None)

        # No last checkpoint, empty collections, nothing in oplog
        self.opman1.collection_dump = True
        self.opman2.collection_dump = True
        self.assertEqual(self.opman1.init_cursor(), None)
        self.assertEqual(self.opman1.checkpoint, None)
        self.assertEqual(self.opman2.init_cursor(), None)
        self.assertEqual(self.opman2.checkpoint, None)

        # No last checkpoint, empty collections, something in oplog
        self.opman1.oplog = self.shard1_conn["local"]["oplog.rs"]
        self.opman2.oplog = self.shard2_conn["local"]["oplog.rs"]
        oplog_startup_ts = self.opman2.get_last_oplog_timestamp()
        collection = self.mongos_conn["test"]["mcsharded"]
        collection.insert({"i": 1})
        collection.remove({"i": 1})
        time.sleep(3)
        last_ts1 = self.opman1.get_last_oplog_timestamp()
        self.assertEqual(next(self.opman1.init_cursor())["ts"], last_ts1)
        self.assertEqual(self.opman1.checkpoint, last_ts1)
        with self.opman1.oplog_progress as prog:
            self.assertEqual(prog.get_dict()[str(self.opman1.oplog)], last_ts1)
        # init_cursor should point to startup message in shard2 oplog
        cursor = self.opman2.init_cursor()
        self.assertEqual(next(cursor)["ts"], oplog_startup_ts)
        self.assertEqual(self.opman2.checkpoint, oplog_startup_ts)

        # No last checkpoint, non-empty collections, stuff in oplog
        progress = LockingDict()
        self.opman1.oplog_progress = self.opman2.oplog_progress = progress
        collection.insert({"i": 1200})
        last_ts2 = self.opman2.get_last_oplog_timestamp()
        self.assertEqual(next(self.opman1.init_cursor())["ts"], last_ts1)
        self.assertEqual(self.opman1.checkpoint, last_ts1)
        with self.opman1.oplog_progress as prog:
            self.assertEqual(prog.get_dict()[str(self.opman1.oplog)], last_ts1)
        self.assertEqual(next(self.opman2.init_cursor())["ts"], last_ts2)
        self.assertEqual(self.opman2.checkpoint, last_ts2)
        with self.opman2.oplog_progress as prog:
            self.assertEqual(prog.get_dict()[str(self.opman2.oplog)], last_ts2)

        # Last checkpoint exists
        progress = LockingDict()
        self.opman1.oplog_progress = self.opman2.oplog_progress = progress
        for i in range(1000):
            collection.insert({"i": i + 500})
        entry1 = list(
            self.shard1_conn["local"]["oplog.rs"].find(skip=200, limit=2))
        entry2 = list(
            self.shard2_conn["local"]["oplog.rs"].find(skip=200, limit=2))
        progress.get_dict()[str(self.opman1.oplog)] = entry1[0]["ts"]
        progress.get_dict()[str(self.opman2.oplog)] = entry2[0]["ts"]
        self.opman1.oplog_progress = self.opman2.oplog_progress = progress
        self.opman1.checkpoint = self.opman2.checkpoint = None
        cursor1 = self.opman1.init_cursor()
        cursor2 = self.opman2.init_cursor()
        self.assertEqual(entry1[1]["ts"], next(cursor1)["ts"])
        self.assertEqual(entry2[1]["ts"], next(cursor2)["ts"])
        self.assertEqual(self.opman1.checkpoint, entry1[0]["ts"])
        self.assertEqual(self.opman2.checkpoint, entry2[0]["ts"])
        with self.opman1.oplog_progress as prog:
            self.assertEqual(prog.get_dict()[str(self.opman1.oplog)],
                             entry1[0]["ts"])
        with self.opman2.oplog_progress as prog:
            self.assertEqual(prog.get_dict()[str(self.opman2.oplog)],
                             entry2[0]["ts"])
    def test_init_cursor(self):
        """Test the init_cursor method

        Cases:

        1. no last checkpoint, no collection dump
        2. no last checkpoint, collection dump ok and stuff to dump
        3. no last checkpoint, nothing to dump, stuff in oplog
        4. no last checkpoint, nothing to dump, nothing in oplog
        5. no last checkpoint, no collection dump, stuff in oplog
        6. last checkpoint exists
        7. last checkpoint is behind
        """

        # N.B. these sub-cases build off of each other and cannot be re-ordered
        # without side-effects

        # No last checkpoint, no collection dump, nothing in oplog
        # "change oplog collection" to put nothing in oplog
        self.opman1.oplog = self.shard1_conn["test"]["emptycollection"]
        self.opman2.oplog = self.shard2_conn["test"]["emptycollection"]
        self.opman1.collection_dump = False
        self.opman2.collection_dump = False
        self.assertTrue(all(doc['op'] == 'n'
                            for doc in self.opman1.init_cursor()[0]))
        self.assertEqual(self.opman1.checkpoint, None)
        self.assertTrue(all(doc['op'] == 'n'
                            for doc in self.opman2.init_cursor()[0]))
        self.assertEqual(self.opman2.checkpoint, None)

        # No last checkpoint, empty collections, nothing in oplog
        self.opman1.collection_dump = self.opman2.collection_dump = True

        cursor, cursor_len = self.opman1.init_cursor()
        self.assertEqual(cursor, None)
        self.assertEqual(cursor_len, 0)
        self.assertEqual(self.opman1.checkpoint, None)
        cursor, cursor_len = self.opman2.init_cursor()
        self.assertEqual(cursor, None)
        self.assertEqual(cursor_len, 0)
        self.assertEqual(self.opman2.checkpoint, None)

        # No last checkpoint, empty collections, something in oplog
        self.opman1.oplog = self.shard1_conn["local"]["oplog.rs"]
        self.opman2.oplog = self.shard2_conn["local"]["oplog.rs"]
        oplog_startup_ts = self.opman2.get_last_oplog_timestamp()
        collection = self.mongos_conn["test"]["mcsharded"]
        collection.insert_one({"i": 1})
        collection.delete_one({"i": 1})
        time.sleep(3)
        last_ts1 = self.opman1.get_last_oplog_timestamp()
        cursor, cursor_len = self.opman1.init_cursor()
        self.assertEqual(cursor_len, 0)
        self.assertEqual(self.opman1.checkpoint, last_ts1)
        with self.opman1.oplog_progress as prog:
            self.assertEqual(prog.get_dict()[str(self.opman1.oplog)], last_ts1)
        # init_cursor should point to startup message in shard2 oplog
        cursor, cursor_len = self.opman2.init_cursor()
        self.assertEqual(cursor_len, 0)
        self.assertEqual(self.opman2.checkpoint, oplog_startup_ts)

        # No last checkpoint, no collection dump, stuff in oplog
        progress = LockingDict()
        self.opman1.oplog_progress = self.opman2.oplog_progress = progress
        self.opman1.collection_dump = self.opman2.collection_dump = False
        collection.insert_one({"i": 1200})
        last_ts2 = self.opman2.get_last_oplog_timestamp()
        self.opman1.init_cursor()
        self.assertEqual(self.opman1.checkpoint, last_ts1)
        with self.opman1.oplog_progress as prog:
            self.assertEqual(prog.get_dict()[str(self.opman1.oplog)], last_ts1)
        cursor, cursor_len = self.opman2.init_cursor()
        for i in range(cursor_len - 1):
            next(cursor)
        self.assertEqual(next(cursor)["o"]["i"], 1200)
        self.assertEqual(self.opman2.checkpoint, last_ts2)
        with self.opman2.oplog_progress as prog:
            self.assertEqual(prog.get_dict()[str(self.opman2.oplog)], last_ts2)

        # Last checkpoint exists
        progress = LockingDict()
        self.opman1.oplog_progress = self.opman2.oplog_progress = progress
        for i in range(1000):
            collection.insert_one({"i": i + 500})
        entry1 = list(
            self.shard1_conn["local"]["oplog.rs"].find(skip=200, limit=-2))
        entry2 = list(
            self.shard2_conn["local"]["oplog.rs"].find(skip=200, limit=-2))
        progress.get_dict()[str(self.opman1.oplog)] = entry1[0]["ts"]
        progress.get_dict()[str(self.opman2.oplog)] = entry2[0]["ts"]
        self.opman1.oplog_progress = self.opman2.oplog_progress = progress
        self.opman1.checkpoint = self.opman2.checkpoint = None
        cursor1, cursor_len1 = self.opman1.init_cursor()
        cursor2, cursor_len2 = self.opman2.init_cursor()
        self.assertEqual(entry1[1]["ts"], next(cursor1)["ts"])
        self.assertEqual(entry2[1]["ts"], next(cursor2)["ts"])
        self.assertEqual(self.opman1.checkpoint, entry1[0]["ts"])
        self.assertEqual(self.opman2.checkpoint, entry2[0]["ts"])
        with self.opman1.oplog_progress as prog:
            self.assertEqual(prog.get_dict()[str(self.opman1.oplog)],
                             entry1[0]["ts"])
        with self.opman2.oplog_progress as prog:
            self.assertEqual(prog.get_dict()[str(self.opman2.oplog)],
                             entry2[0]["ts"])

        # Last checkpoint is behind
        progress = LockingDict()
        progress.get_dict()[str(self.opman1.oplog)] = bson.Timestamp(1, 0)
        progress.get_dict()[str(self.opman2.oplog)] = bson.Timestamp(1, 0)
        self.opman1.oplog_progress = self.opman2.oplog_progress = progress
        self.opman1.checkpoint = self.opman2.checkpoint = None
        cursor, cursor_len = self.opman1.init_cursor()
        self.assertEqual(cursor_len, 0)
        self.assertEqual(cursor, None)
        self.assertIsNotNone(self.opman1.checkpoint)
        cursor, cursor_len = self.opman2.init_cursor()
        self.assertEqual(cursor_len, 0)
        self.assertEqual(cursor, None)
        self.assertIsNotNone(self.opman2.checkpoint)
    def test_init_cursor(self):
        """Test the init_cursor method

        Cases:

        1. no last checkpoint, no collection dump
        2. no last checkpoint, collection dump ok and stuff to dump
        3. no last checkpoint, nothing to dump, stuff in oplog
        4. no last checkpoint, nothing to dump, nothing in oplog
        5. no last checkpoint, no collection dump, stuff in oplog
        6. last checkpoint exists
        7. last checkpoint is behind
        """

        # N.B. these sub-cases build off of each other and cannot be re-ordered
        # without side-effects

        # No last checkpoint, no collection dump, nothing in oplog
        # "change oplog collection" to put nothing in oplog
        self.opman.oplog = self.primary_conn["test"]["emptycollection"]
        self.opman.collection_dump = False
        self.assertTrue(all(doc["op"] == "n" for doc in self.opman.init_cursor()[0]))
        self.assertEqual(self.opman.checkpoint, None)

        # No last checkpoint, empty collections, nothing in oplog
        self.opman.collection_dump = True
        cursor, cursor_len = self.opman.init_cursor()
        self.assertEqual(cursor, None)
        self.assertEqual(cursor_len, 0)
        self.assertEqual(self.opman.checkpoint, None)

        # No last checkpoint, empty collections, something in oplog
        self.opman.oplog = self.primary_conn["local"]["oplog.rs"]
        collection = self.primary_conn["test"]["test"]
        collection.insert({"i": 1})
        collection.remove({"i": 1})
        time.sleep(3)
        last_ts = self.opman.get_last_oplog_timestamp()
        cursor, cursor_len = self.opman.init_cursor()
        self.assertEqual(cursor_len, 0)
        self.assertEqual(self.opman.checkpoint, last_ts)
        with self.opman.oplog_progress as prog:
            self.assertEqual(prog.get_dict()[str(self.opman.oplog)], last_ts)

        # No last checkpoint, no collection dump, something in oplog
        self.opman.oplog_progress = LockingDict()
        self.opman.collection_dump = False
        collection.insert({"i": 2})
        last_ts = self.opman.get_last_oplog_timestamp()
        cursor, cursor_len = self.opman.init_cursor()
        for i in range(cursor_len - 1):
            next(cursor)
        self.assertEqual(next(cursor)["o"]["i"], 2)
        self.assertEqual(self.opman.checkpoint, last_ts)

        # Last checkpoint exists
        progress = LockingDict()
        self.opman.oplog_progress = progress
        for i in range(1000):
            collection.insert({"i": i + 500})
        entry = list(self.primary_conn["local"]["oplog.rs"].find(skip=200, limit=2))
        progress.get_dict()[str(self.opman.oplog)] = entry[0]["ts"]
        self.opman.oplog_progress = progress
        self.opman.checkpoint = None
        cursor, cursor_len = self.opman.init_cursor()
        self.assertEqual(next(cursor)["ts"], entry[1]["ts"])
        self.assertEqual(self.opman.checkpoint, entry[0]["ts"])
        with self.opman.oplog_progress as prog:
            self.assertEqual(prog.get_dict()[str(self.opman.oplog)], entry[0]["ts"])

        # Last checkpoint is behind
        progress = LockingDict()
        progress.get_dict()[str(self.opman.oplog)] = bson.Timestamp(1, 0)
        self.opman.oplog_progress = progress
        self.opman.checkpoint = None
        cursor, cursor_len = self.opman.init_cursor()
        self.assertEqual(cursor_len, 0)
        self.assertEqual(cursor, None)
        self.assertIsNotNone(self.opman.checkpoint)
Example #33
0
    def test_init_cursor(self):
        """Test the init_cursor method

        Cases:

        1. no last checkpoint, no collection dump
        2. no last checkpoint, collection dump ok and stuff to dump
        3. no last checkpoint, nothing to dump, stuff in oplog
        4. no last checkpoint, nothing to dump, nothing in oplog
        5. no last checkpoint, no collection dump, stuff in oplog
        6. last checkpoint exists
        7. last checkpoint is behind
        """

        # N.B. these sub-cases build off of each other and cannot be re-ordered
        # without side-effects

        self.reset_opman(["includedb1.*", "includedb2.includecol1"], [], {})

        # No last checkpoint, no collection dump, nothing in oplog
        # "change oplog collection" to put nothing in oplog
        self.opman.oplog = self.primary_conn["includedb1"]["emptycollection"]
        self.opman.collection_dump = False
        self.assertTrue(
            all(doc['op'] == 'n' for doc in self.opman.init_cursor()[0]))
        self.assertEqual(self.opman.checkpoint, None)

        # No last checkpoint, empty collections, nothing in oplog
        self.opman.collection_dump = True
        cursor, cursor_empty = self.opman.init_cursor()
        self.assertEqual(cursor, None)
        self.assertTrue(cursor_empty)
        self.assertEqual(self.opman.checkpoint, None)

        # No last checkpoint, empty collections, something in oplog
        self.opman.oplog = self.primary_conn['local']['oplog.rs']
        collection = self.primary_conn["includedb1"]["includecol1"]
        collection.insert_one({"idb1col1": 1})
        collection.delete_one({"idb1col1": 1})
        time.sleep(3)
        last_ts = self.opman.get_last_oplog_timestamp()
        cursor, cursor_empty = self.opman.init_cursor()
        self.assertFalse(cursor_empty)
        self.assertEqual(self.opman.checkpoint, last_ts)
        self.assertEqual(self.opman.read_last_checkpoint(), last_ts)

        # No last checkpoint, no collection dump, something in oplog
        # If collection dump is false the checkpoint should not be set
        self.opman.checkpoint = None
        self.opman.oplog_progress = LockingDict()
        self.opman.collection_dump = False
        collection.insert_one({"idb1col1": 2})
        cursor, cursor_empty = self.opman.init_cursor()
        for doc in cursor:
            last_doc = doc
        self.assertEqual(last_doc['o']['idb1col1'], 2)
        self.assertIsNone(self.opman.checkpoint)

        # Last checkpoint exists
        collection.insert_many([{"idb1col1": i + 500} for i in range(1000)])
        entry = list(self.primary_conn["local"]["oplog.rs"].find(skip=200,
                                                                 limit=-2))
        self.opman.update_checkpoint(entry[0]["ts"])
        cursor, cursor_empty = self.opman.init_cursor()
        self.assertEqual(next(cursor)["ts"], entry[1]["ts"])
        self.assertEqual(self.opman.checkpoint, entry[0]["ts"])
        self.assertEqual(self.opman.read_last_checkpoint(), entry[0]["ts"])

        # Last checkpoint is behind
        self.opman.update_checkpoint(bson.Timestamp(1, 0))
        cursor, cursor_empty = self.opman.init_cursor()
        self.assertTrue(cursor_empty)
        self.assertEqual(cursor, None)
        self.assertEqual(self.opman.checkpoint, bson.Timestamp(1, 0))
Example #34
0
    def __init__(self, address, oplog_checkpoint, target_url, ns_set,
                 u_key, auth_key, doc_manager=None, auth_username=None):
        if doc_manager is not None:
            doc_manager = imp.load_source('DocManager', doc_manager)
        else:
            from mongo_connector.doc_manager import DocManager
        time.sleep(1)
        super(Connector, self).__init__()

        #can_run is set to false when we join the thread
        self.can_run = True

        #The name of the file that stores the progress of the OplogThreads
        self.oplog_checkpoint = oplog_checkpoint

        #main address - either mongos for sharded setups or a primary otherwise
        self.address = address

        #The URL of the target system
        self.target_url = target_url

        #The set of relevant namespaces to consider
        self.ns_set = ns_set

        #The key that is a unique document identifier for the target system.
        #Not necessarily the mongo unique key.
        self.u_key = u_key

        #Password for authentication
        self.auth_key = auth_key

        #Username for authentication
        self.auth_username = auth_username

        #The set of OplogThreads created
        self.shard_set = {}

        #Dict of OplogThread/timestamp pairs to record progress
        self.oplog_progress = LockingDict()

        try:
            if target_url is None:
                if doc_manager is None:  # imported using from... import
                    self.doc_manager = DocManager(unique_key=u_key)
                else:  # imported using load source
                    self.doc_manager = doc_manager.DocManager(unique_key=u_key)
            else:
                if doc_manager is None:
                    self.doc_manager = DocManager(self.target_url,
                                                  unique_key=u_key)
                else:
                    self.doc_manager = doc_manager.DocManager(self.target_url,
                                                              unique_key=u_key)
        except errors.ConnectionFailed:
            err_msg = "MongoConnector: Could not connect to target system"
            logging.critical(err_msg)
            self.can_run = False
            return

        if self.oplog_checkpoint is not None:
            if not os.path.exists(self.oplog_checkpoint):
                info_str = ("MongoConnector: Can't find %s, "
                            "attempting to create an empty progress log" %
                            self.oplog_checkpoint)
                logging.info(info_str)
                try:
                    # Create oplog progress file
                    open(self.oplog_checkpoint, "w").close()
                except IOError as e:
                    logging.critical("MongoConnector: Could not "
                                     "create a progress log: %s" %
                                     str(e))
                    sys.exit(1)
            else:
                if (not os.access(self.oplog_checkpoint, os.W_OK)
                        and not os.access(self.oplog_checkpoint, os.R_OK )):
                    logging.critical("Invalid permissions on %s! Exiting" %
                        (self.oplog_checkpoint))
                    sys.exit(1)
    def test_init_cursor(self):
        """Test the init_cursor method

        Cases:

        1. no last checkpoint, no collection dump
        2. no last checkpoint, collection dump ok and stuff to dump
        3. no last checkpoint, nothing to dump, stuff in oplog
        4. no last checkpoint, nothing to dump, nothing in oplog
        5. no last checkpoint, no collection dump, stuff in oplog
        6. last checkpoint exists
        7. last checkpoint is behind
        """

        # N.B. these sub-cases build off of each other and cannot be re-ordered
        # without side-effects

        # No last checkpoint, no collection dump, nothing in oplog
        # "change oplog collection" to put nothing in oplog
        self.opman.oplog = self.primary_conn["test"]["emptycollection"]
        self.opman.collection_dump = False
        self.assertTrue(
            all(doc['op'] == 'n' for doc in self.opman.init_cursor()[0]))
        self.assertEqual(self.opman.checkpoint, None)

        # No last checkpoint, empty collections, nothing in oplog
        self.opman.collection_dump = True
        cursor, cursor_len = self.opman.init_cursor()
        self.assertEqual(cursor, None)
        self.assertEqual(cursor_len, 0)
        self.assertEqual(self.opman.checkpoint, None)

        # No last checkpoint, empty collections, something in oplog
        self.opman.oplog = self.primary_conn['local']['oplog.rs']
        collection = self.primary_conn["test"]["test"]
        collection.insert_one({"i": 1})
        collection.delete_one({"i": 1})
        time.sleep(3)
        last_ts = self.opman.get_last_oplog_timestamp()
        cursor, cursor_len = self.opman.init_cursor()
        self.assertEqual(cursor_len, 0)
        self.assertEqual(self.opman.checkpoint, last_ts)
        with self.opman.oplog_progress as prog:
            self.assertEqual(prog.get_dict()[str(self.opman.oplog)], last_ts)

        # No last checkpoint, no collection dump, something in oplog
        self.opman.oplog_progress = LockingDict()
        self.opman.collection_dump = False
        collection.insert_one({"i": 2})
        last_ts = self.opman.get_last_oplog_timestamp()
        cursor, cursor_len = self.opman.init_cursor()
        for i in range(cursor_len - 1):
            next(cursor)
        self.assertEqual(next(cursor)['o']['i'], 2)
        self.assertEqual(self.opman.checkpoint, last_ts)

        # Last checkpoint exists
        progress = LockingDict()
        self.opman.oplog_progress = progress
        for i in range(1000):
            collection.insert_one({"i": i + 500})
        entry = list(self.primary_conn["local"]["oplog.rs"].find(skip=200,
                                                                 limit=-2))
        progress.get_dict()[str(self.opman.oplog)] = entry[0]["ts"]
        self.opman.oplog_progress = progress
        self.opman.checkpoint = None
        cursor, cursor_len = self.opman.init_cursor()
        self.assertEqual(next(cursor)["ts"], entry[1]["ts"])
        self.assertEqual(self.opman.checkpoint, entry[0]["ts"])
        with self.opman.oplog_progress as prog:
            self.assertEqual(prog.get_dict()[str(self.opman.oplog)],
                             entry[0]["ts"])

        # Last checkpoint is behind
        progress = LockingDict()
        progress.get_dict()[str(self.opman.oplog)] = bson.Timestamp(1, 0)
        self.opman.oplog_progress = progress
        self.opman.checkpoint = None
        cursor, cursor_len = self.opman.init_cursor()
        self.assertEqual(cursor_len, 0)
        self.assertEqual(cursor, None)
        self.assertIsNotNone(self.opman.checkpoint)
Example #36
0
    def setUp(self):
        """ Initialize the cluster:

        Clean out the databases used by the tests
        Make connections to mongos, mongods
        Create and shard test collections
        Create OplogThreads
        """
        self.cluster = ShardedCluster().start()

        # Connection to mongos
        self.mongos_conn = self.cluster.client()

        # Connections to the shards
        self.shard1_conn = self.cluster.shards[0].client()
        self.shard2_conn = self.cluster.shards[1].client()
        self.shard1_secondary_conn = self.cluster.shards[0].secondary.client(
            readPreference=ReadPreference.SECONDARY_PREFERRED)
        self.shard2_secondary_conn = self.cluster.shards[1].secondary.client(
            readPreference=ReadPreference.SECONDARY_PREFERRED)

        # Wipe any test data
        self.mongos_conn["test"]["mcsharded"].drop()

        # Create and shard the collection test.mcsharded on the "i" field
        self.mongos_conn["test"]["mcsharded"].ensure_index("i")
        self.mongos_conn.admin.command("enableSharding", "test")
        self.mongos_conn.admin.command("shardCollection",
                                       "test.mcsharded",
                                       key={"i": 1})

        # Pre-split the collection so that:
        # i < 1000            lives on shard1
        # i >= 1000           lives on shard2
        self.mongos_conn.admin.command(
            bson.SON([("split", "test.mcsharded"), ("middle", {
                "i": 1000
            })]))

        # disable the balancer
        self.mongos_conn.config.settings.update({"_id": "balancer"},
                                                {"$set": {
                                                    "stopped": True
                                                }},
                                                upsert=True)

        # Move chunks to their proper places
        try:
            self.mongos_conn["admin"].command("moveChunk",
                                              "test.mcsharded",
                                              find={"i": 1},
                                              to='demo-set-0')
        except pymongo.errors.OperationFailure:
            pass
        try:
            self.mongos_conn["admin"].command("moveChunk",
                                              "test.mcsharded",
                                              find={"i": 1000},
                                              to='demo-set-1')
        except pymongo.errors.OperationFailure:
            pass

        # Make sure chunks are distributed correctly
        self.mongos_conn["test"]["mcsharded"].insert({"i": 1})
        self.mongos_conn["test"]["mcsharded"].insert({"i": 1000})

        def chunks_moved():
            doc1 = self.shard1_conn.test.mcsharded.find_one()
            doc2 = self.shard2_conn.test.mcsharded.find_one()
            if None in (doc1, doc2):
                return False
            return doc1['i'] == 1 and doc2['i'] == 1000

        assert_soon(chunks_moved,
                    max_tries=120,
                    message='chunks not moved? doc1=%r, doc2=%r' %
                    (self.shard1_conn.test.mcsharded.find_one(),
                     self.shard2_conn.test.mcsharded.find_one()))
        self.mongos_conn.test.mcsharded.remove()

        # create a new oplog progress file
        try:
            os.unlink("oplog.timestamp")
        except OSError:
            pass
        open("oplog.timestamp", "w").close()

        # Oplog threads (oplog manager) for each shard
        doc_manager = DocManager()
        oplog_progress = LockingDict()
        self.opman1 = OplogThread(
            primary_client=self.shard1_conn,
            doc_managers=(doc_manager, ),
            oplog_progress_dict=oplog_progress,
            namespace_set=["test.mcsharded", "test.mcunsharded"],
            mongos_client=self.mongos_conn)
        self.opman2 = OplogThread(
            primary_client=self.shard2_conn,
            doc_managers=(doc_manager, ),
            oplog_progress_dict=oplog_progress,
            namespace_set=["test.mcsharded", "test.mcunsharded"],
            mongos_client=self.mongos_conn)
    def setUp(self):
        """ Initialize the cluster:

        Clean out the databases used by the tests
        Make connections to mongos, mongods
        Create and shard test collections
        Create OplogThreads
        """
        # Start the cluster with a mongos on port 27217
        self.mongos_p = start_cluster()

        # Connection to mongos
        mongos_address = '%s:%d' % (mongo_host, self.mongos_p)
        self.mongos_conn = MongoClient(mongos_address)

        # Connections to the shards
        shard1_ports = get_shard(self.mongos_p, 0)
        shard2_ports = get_shard(self.mongos_p, 1)
        self.shard1_prim_p = shard1_ports['primary']
        self.shard1_scnd_p = shard1_ports['secondaries'][0]
        self.shard2_prim_p = shard2_ports['primary']
        self.shard2_scnd_p = shard2_ports['secondaries'][0]
        self.shard1_conn = MongoClient('%s:%d'
                                       % (mongo_host, self.shard1_prim_p),
                                       replicaSet="demo-set-0")
        self.shard2_conn = MongoClient('%s:%d'
                                       % (mongo_host, self.shard2_prim_p),
                                       replicaSet="demo-set-1")
        self.shard1_secondary_conn = MongoClient(
            '%s:%d' % (mongo_host, self.shard1_scnd_p),
            read_preference=ReadPreference.SECONDARY_PREFERRED
        )
        self.shard2_secondary_conn = MongoClient(
            '%s:%d' % (mongo_host, self.shard2_scnd_p),
            read_preference=ReadPreference.SECONDARY_PREFERRED
        )

        # Wipe any test data
        self.mongos_conn["test"]["mcsharded"].drop()

        # Create and shard the collection test.mcsharded on the "i" field
        self.mongos_conn["test"]["mcsharded"].ensure_index("i")
        self.mongos_conn.admin.command("enableSharding", "test")
        self.mongos_conn.admin.command("shardCollection",
                                       "test.mcsharded",
                                       key={"i": 1})

        # Pre-split the collection so that:
        # i < 1000            lives on shard1
        # i >= 1000           lives on shard2
        self.mongos_conn.admin.command(bson.SON([
            ("split", "test.mcsharded"),
            ("middle", {"i": 1000})
        ]))

        # disable the balancer
        self.mongos_conn.config.settings.update(
            {"_id": "balancer"},
            {"$set": {"stopped": True}},
            upsert=True
        )

        # Move chunks to their proper places
        try:
            self.mongos_conn["admin"].command(
                "moveChunk",
                "test.mcsharded",
                find={"i": 1},
                to="demo-set-0"
            )
        except pymongo.errors.OperationFailure:
            pass        # chunk may already be on the correct shard
        try:
            self.mongos_conn["admin"].command(
                "moveChunk",
                "test.mcsharded",
                find={"i": 1000},
                to="demo-set-1"
            )
        except pymongo.errors.OperationFailure:
            pass        # chunk may already be on the correct shard

        # Make sure chunks are distributed correctly
        self.mongos_conn["test"]["mcsharded"].insert({"i": 1})
        self.mongos_conn["test"]["mcsharded"].insert({"i": 1000})

        def chunks_moved():
            doc1 = self.shard1_conn.test.mcsharded.find_one()
            doc2 = self.shard2_conn.test.mcsharded.find_one()
            if None in (doc1, doc2):
                return False
            return doc1['i'] == 1 and doc2['i'] == 1000
        assert_soon(chunks_moved)
        self.mongos_conn.test.mcsharded.remove()

        # create a new oplog progress file
        try:
            os.unlink("config.txt")
        except OSError:
            pass
        open("config.txt", "w").close()

        # Oplog threads (oplog manager) for each shard
        doc_manager = DocManager()
        oplog_progress = LockingDict()
        self.opman1 = OplogThread(
            primary_conn=self.shard1_conn,
            main_address='%s:%d' % (mongo_host, self.mongos_p),
            oplog_coll=self.shard1_conn["local"]["oplog.rs"],
            is_sharded=True,
            doc_manager=doc_manager,
            oplog_progress_dict=oplog_progress,
            namespace_set=["test.mcsharded", "test.mcunsharded"],
            auth_key=None,
            auth_username=None
        )
        self.opman2 = OplogThread(
            primary_conn=self.shard2_conn,
            main_address='%s:%d' % (mongo_host, self.mongos_p),
            oplog_coll=self.shard2_conn["local"]["oplog.rs"],
            is_sharded=True,
            doc_manager=doc_manager,
            oplog_progress_dict=oplog_progress,
            namespace_set=["test.mcsharded", "test.mcunsharded"],
            auth_key=None,
            auth_username=None
        )
class Connector(threading.Thread):
    """Checks the cluster for shards to tail.
    """
    def __init__(self, address, oplog_checkpoint, target_url, ns_set,
                 u_key, auth_key, doc_manager=None, auth_username=None,
                 collection_dump=True, batch_size=constants.DEFAULT_BATCH_SIZE,
                 fields=None, dest_mapping={},
                 auto_commit_interval=constants.DEFAULT_COMMIT_INTERVAL,
                 continue_on_error=False):

        if target_url and not doc_manager:
            raise errors.ConnectorError("Cannot create a Connector with a "
                                        "target URL but no doc manager!")

        def is_string(s):
            try:
                return isinstance(s, basestring)
            except NameError:
                return isinstance(s, str)

        def load_doc_manager(path):
            name, _ = os.path.splitext(os.path.basename(path))
            try:
                import importlib.machinery
                loader = importlib.machinery.SourceFileLoader(name, path)
                module = loader.load_module(name)
            except ImportError:
                module = imp.load_source(name, path)
            return module

        doc_manager_modules = None

        if doc_manager is not None:
            # backwards compatilibity: doc_manager may be a string
            if is_string(doc_manager):
                doc_manager_modules = [load_doc_manager(doc_manager)]
            # doc_manager is a list
            else:
                doc_manager_modules = []
                for dm in doc_manager:
                    doc_manager_modules.append(load_doc_manager(dm))

        super(Connector, self).__init__()

        #can_run is set to false when we join the thread
        self.can_run = True

        #The name of the file that stores the progress of the OplogThreads
        self.oplog_checkpoint = oplog_checkpoint

        #main address - either mongos for sharded setups or a primary otherwise
        self.address = address

        #The URLs of each target system, respectively
        if is_string(target_url):
            self.target_urls = [target_url]
        elif target_url:
            self.target_urls = list(target_url)
        else:
            self.target_urls = None

        #The set of relevant namespaces to consider
        self.ns_set = ns_set

        #The dict of source namespace to destination namespace
        self.dest_mapping = dest_mapping

        #Whether the collection dump gracefully handles exceptions
        self.continue_on_error = continue_on_error

        #The key that is a unique document identifier for the target system.
        #Not necessarily the mongo unique key.
        self.u_key = u_key

        #Password for authentication
        self.auth_key = auth_key

        #Username for authentication
        self.auth_username = auth_username

        #The set of OplogThreads created
        self.shard_set = {}

        #Boolean chooses whether to dump the entire collection if no timestamp
        # is present in the config file
        self.collection_dump = collection_dump

        #Num entries to process before updating config file with current pos
        self.batch_size = batch_size

        #Dict of OplogThread/timestamp pairs to record progress
        self.oplog_progress = LockingDict()

        # List of fields to export
        self.fields = fields

        try:
            docman_kwargs = {"unique_key": u_key,
                             "namespace_set": ns_set,
                             "auto_commit_interval": auto_commit_interval}

            # No doc managers specified, using simulator
            if doc_manager is None:
                self.doc_managers = [simulator.DocManager(**docman_kwargs)]
            else:
                self.doc_managers = []
                for i, d in enumerate(doc_manager_modules):
                    # self.target_urls may be shorter than
                    # self.doc_managers, or left as None
                    if self.target_urls and i < len(self.target_urls):
                        target_url = self.target_urls[i]
                    else:
                        target_url = None

                    if target_url:
                        self.doc_managers.append(
                            d.DocManager(self.target_urls[i],
                                         **docman_kwargs))
                    else:
                        self.doc_managers.append(
                            d.DocManager(**docman_kwargs))
                # If more target URLs were given than doc managers, may need
                # to create additional doc managers
                for url in self.target_urls[i + 1:]:
                    self.doc_managers.append(
                        doc_manager_modules[-1].DocManager(url,
                                                           **docman_kwargs))
        except errors.ConnectionFailed:
            err_msg = "MongoConnector: Could not connect to target system"
            logging.critical(err_msg)
            self.can_run = False
            return

        if self.oplog_checkpoint is not None:
            if not os.path.exists(self.oplog_checkpoint):
                info_str = ("MongoConnector: Can't find %s, "
                            "attempting to create an empty progress log" %
                            self.oplog_checkpoint)
                logging.info(info_str)
                try:
                    # Create oplog progress file
                    open(self.oplog_checkpoint, "w").close()
                except IOError as e:
                    logging.critical("MongoConnector: Could not "
                                     "create a progress log: %s" %
                                     str(e))
                    sys.exit(2)
            else:
                if (not os.access(self.oplog_checkpoint, os.W_OK)
                        and not os.access(self.oplog_checkpoint, os.R_OK)):
                    logging.critical("Invalid permissions on %s! Exiting" %
                                     (self.oplog_checkpoint))
                    sys.exit(2)

    def join(self):
        """ Joins thread, stops it from running
        """
        self.can_run = False
        for dm in self.doc_managers:
            dm.stop()
        threading.Thread.join(self)

    def write_oplog_progress(self):
        """ Writes oplog progress to file provided by user
        """

        if self.oplog_checkpoint is None:
            return None

        # write to temp file
        backup_file = self.oplog_checkpoint + '.backup'
        os.rename(self.oplog_checkpoint, backup_file)

        # for each of the threads write to file
        with open(self.oplog_checkpoint, 'w') as dest:
            with self.oplog_progress as oplog_prog:

                oplog_dict = oplog_prog.get_dict()
                for oplog, time_stamp in oplog_dict.items():
                    oplog_str = str(oplog)
                    timestamp = util.bson_ts_to_long(time_stamp)
                    json_str = json.dumps([oplog_str, timestamp])
                    try:
                        dest.write(json_str)
                    except IOError:
                        # Basically wipe the file, copy from backup
                        dest.truncate()
                        with open(backup_file, 'r') as backup:
                            shutil.copyfile(backup, dest)
                        break

        os.remove(self.oplog_checkpoint + '.backup')

    def read_oplog_progress(self):
        """Reads oplog progress from file provided by user.
        This method is only called once before any threads are spanwed.
        """

        if self.oplog_checkpoint is None:
            return None

        # Check for empty file
        try:
            if os.stat(self.oplog_checkpoint).st_size == 0:
                logging.info("MongoConnector: Empty oplog progress file.")
                return None
        except OSError:
            return None

        source = open(self.oplog_checkpoint, 'r')
        try:
            data = json.load(source)
        except ValueError:       # empty file
            reason = "It may be empty or corrupt."
            logging.info("MongoConnector: Can't read oplog progress file. %s" %
                         (reason))
            source.close()
            return None

        source.close()

        count = 0
        oplog_dict = self.oplog_progress.get_dict()
        for count in range(0, len(data), 2):
            oplog_str = data[count]
            time_stamp = data[count + 1]
            oplog_dict[oplog_str] = util.long_to_bson_ts(time_stamp)
            #stored as bson_ts

    def run(self):
        """Discovers the mongo cluster and creates a thread for each primary.
        """
        main_conn = MongoClient(self.address)
        if self.auth_key is not None:
            main_conn['admin'].authenticate(self.auth_username, self.auth_key)
        self.read_oplog_progress()
        conn_type = None

        try:
            main_conn.admin.command("isdbgrid")
        except pymongo.errors.OperationFailure:
            conn_type = "REPLSET"

        if conn_type == "REPLSET":
            # Make sure we are connected to a replica set
            is_master = main_conn.admin.command("isMaster")
            if not "setName" in is_master:
                logging.error(
                    'No replica set at "%s"! A replica set is required '
                    'to run mongo-connector. Shutting down...' % self.address
                )
                return

            # Establish a connection to the replica set as a whole
            main_conn.disconnect()
            main_conn = MongoClient(self.address,
                                    replicaSet=is_master['setName'])
            if self.auth_key is not None:
                main_conn.admin.authenticate(self.auth_username, self.auth_key)

            #non sharded configuration
            oplog_coll = main_conn['local']['oplog.rs']

            oplog = OplogThread(
                primary_conn=main_conn,
                main_address=self.address,
                oplog_coll=oplog_coll,
                is_sharded=False,
                doc_manager=self.doc_managers,
                oplog_progress_dict=self.oplog_progress,
                namespace_set=self.ns_set,
                auth_key=self.auth_key,
                auth_username=self.auth_username,
                repl_set=is_master['setName'],
                collection_dump=self.collection_dump,
                batch_size=self.batch_size,
                fields=self.fields,
                dest_mapping=self.dest_mapping,
                continue_on_error=self.continue_on_error
            )
            self.shard_set[0] = oplog
            logging.info('MongoConnector: Starting connection thread %s' %
                         main_conn)
            oplog.start()

            while self.can_run:
                if not self.shard_set[0].running:
                    logging.error("MongoConnector: OplogThread"
                                  " %s unexpectedly stopped! Shutting down" %
                                  (str(self.shard_set[0])))
                    self.oplog_thread_join()
                    for dm in self.doc_managers:
                        dm.stop()
                    return

                self.write_oplog_progress()
                time.sleep(1)

        else:       # sharded cluster
            while self.can_run is True:

                for shard_doc in main_conn['config']['shards'].find():
                    shard_id = shard_doc['_id']
                    if shard_id in self.shard_set:
                        if not self.shard_set[shard_id].running:
                            logging.error("MongoConnector: OplogThread "
                                          "%s unexpectedly stopped! Shutting "
                                          "down" %
                                          (str(self.shard_set[shard_id])))
                            self.oplog_thread_join()
                            for dm in self.doc_managers:
                                dm.stop()
                            return

                        self.write_oplog_progress()
                        time.sleep(1)
                        continue
                    try:
                        repl_set, hosts = shard_doc['host'].split('/')
                    except ValueError:
                        cause = "The system only uses replica sets!"
                        logging.error("MongoConnector: %s", cause)
                        self.oplog_thread_join()
                        for dm in self.doc_managers:
                            dm.stop()
                        return

                    shard_conn = MongoClient(hosts, replicaSet=repl_set)
                    oplog_coll = shard_conn['local']['oplog.rs']

                    oplog = OplogThread(
                        primary_conn=shard_conn,
                        main_address=self.address,
                        oplog_coll=oplog_coll,
                        is_sharded=True,
                        doc_manager=self.doc_managers,
                        oplog_progress_dict=self.oplog_progress,
                        namespace_set=self.ns_set,
                        auth_key=self.auth_key,
                        auth_username=self.auth_username,
                        collection_dump=self.collection_dump,
                        batch_size=self.batch_size,
                        fields=self.fields,
                        dest_mapping=self.dest_mapping,
                        continue_on_error=self.continue_on_error
                    )
                    self.shard_set[shard_id] = oplog
                    msg = "Starting connection thread"
                    logging.info("MongoConnector: %s %s" % (msg, shard_conn))
                    oplog.start()

        self.oplog_thread_join()
        self.write_oplog_progress()

    def oplog_thread_join(self):
        """Stops all the OplogThreads
        """
        logging.info('MongoConnector: Stopping all OplogThreads')
        for thread in self.shard_set.values():
            thread.join()