Example #1
0
    def __init__(self, address, oplog_checkpoint, target_url, ns_set,
                 u_key, auth_key, doc_manager=None, auth_username=None):
        if doc_manager is not None:
            doc_manager = imp.load_source('DocManager', doc_manager)
        else:
            from mongo_connector.doc_manager import DocManager
        time.sleep(1)
        super(Connector, self).__init__()

        #can_run is set to false when we join the thread
        self.can_run = True

        #The name of the file that stores the progress of the OplogThreads
        self.oplog_checkpoint = oplog_checkpoint

        #main address - either mongos for sharded setups or a primary otherwise
        self.address = address

        #The URL of the target system
        self.target_url = target_url

        #The set of relevant namespaces to consider
        self.ns_set = ns_set

        #The key that is a unique document identifier for the target system.
        #Not necessarily the mongo unique key.
        self.u_key = u_key

        #Password for authentication
        self.auth_key = auth_key

        #Username for authentication
        self.auth_username = auth_username

        #The set of OplogThreads created
        self.shard_set = {}

        #Dict of OplogThread/timestamp pairs to record progress
        self.oplog_progress = LockingDict()

        try:
            if target_url is None:
                if doc_manager is None:  # imported using from... import
                    self.doc_manager = DocManager(unique_key=u_key)
                else:  # imported using load source
                    self.doc_manager = doc_manager.DocManager(unique_key=u_key)
            else:
                if doc_manager is None:
                    self.doc_manager = DocManager(self.target_url,
                                                  unique_key=u_key)
                else:
                    self.doc_manager = doc_manager.DocManager(self.target_url,
                                                              unique_key=u_key)
        except errors.ConnectionFailed:
            err_msg = "MongoConnector: Could not connect to target system"
            logging.critical(err_msg)
            self.can_run = False
            return

        if self.oplog_checkpoint is not None:
            if not os.path.exists(self.oplog_checkpoint):
                info_str = ("MongoConnector: Can't find %s, "
                            "attempting to create an empty progress log" %
                            self.oplog_checkpoint)
                logging.info(info_str)
                try:
                    # Create oplog progress file
                    open(self.oplog_checkpoint, "w").close()
                except IOError as e:
                    logging.critical("MongoConnector: Could not "
                                     "create a progress log: %s" %
                                     str(e))
                    sys.exit(1)
            else:
                if (not os.access(self.oplog_checkpoint, os.W_OK)
                        and not os.access(self.oplog_checkpoint, os.R_OK )):
                    logging.critical("Invalid permissions on %s! Exiting" %
                        (self.oplog_checkpoint))
                    sys.exit(1)
Example #2
0
class Connector(threading.Thread):
    """Checks the cluster for shards to tail.
    """
    def __init__(self, address, oplog_checkpoint, target_url, ns_set,
                 u_key, auth_key, doc_manager=None, auth_username=None):
        if doc_manager is not None:
            doc_manager = imp.load_source('DocManager', doc_manager)
        else:
            from mongo_connector.doc_manager import DocManager
        time.sleep(1)
        super(Connector, self).__init__()

        #can_run is set to false when we join the thread
        self.can_run = True

        #The name of the file that stores the progress of the OplogThreads
        self.oplog_checkpoint = oplog_checkpoint

        #main address - either mongos for sharded setups or a primary otherwise
        self.address = address

        #The URL of the target system
        self.target_url = target_url

        #The set of relevant namespaces to consider
        self.ns_set = ns_set

        #The key that is a unique document identifier for the target system.
        #Not necessarily the mongo unique key.
        self.u_key = u_key

        #Password for authentication
        self.auth_key = auth_key

        #Username for authentication
        self.auth_username = auth_username

        #The set of OplogThreads created
        self.shard_set = {}

        #Dict of OplogThread/timestamp pairs to record progress
        self.oplog_progress = LockingDict()

        try:
            if target_url is None:
                if doc_manager is None:  # imported using from... import
                    self.doc_manager = DocManager(unique_key=u_key)
                else:  # imported using load source
                    self.doc_manager = doc_manager.DocManager(unique_key=u_key)
            else:
                if doc_manager is None:
                    self.doc_manager = DocManager(self.target_url,
                                                  unique_key=u_key)
                else:
                    self.doc_manager = doc_manager.DocManager(self.target_url,
                                                              unique_key=u_key)
        except errors.ConnectionFailed:
            err_msg = "MongoConnector: Could not connect to target system"
            logging.critical(err_msg)
            self.can_run = False
            return

        if self.oplog_checkpoint is not None:
            if not os.path.exists(self.oplog_checkpoint):
                info_str = ("MongoConnector: Can't find %s, "
                            "attempting to create an empty progress log" %
                            self.oplog_checkpoint)
                logging.info(info_str)
                try:
                    # Create oplog progress file
                    open(self.oplog_checkpoint, "w").close()
                except IOError as e:
                    logging.critical("MongoConnector: Could not "
                                     "create a progress log: %s" %
                                     str(e))
                    sys.exit(1)
            else:
                if (not os.access(self.oplog_checkpoint, os.W_OK)
                        and not os.access(self.oplog_checkpoint, os.R_OK )):
                    logging.critical("Invalid permissions on %s! Exiting" %
                        (self.oplog_checkpoint))
                    sys.exit(1)

    def join(self):
        """ Joins thread, stops it from running
        """
        self.can_run = False
        self.doc_manager.stop()
        threading.Thread.join(self)

    def write_oplog_progress(self):
        """ Writes oplog progress to file provided by user
        """

        if self.oplog_checkpoint is None:
            return None

        # write to temp file
        backup_file = self.oplog_checkpoint + '.backup'
        os.rename(self.oplog_checkpoint, backup_file)

        # for each of the threads write to file
        with open(self.oplog_checkpoint, 'w') as dest:
            with self.oplog_progress as oplog_prog:

                oplog_dict = oplog_prog.get_dict()
                for oplog, time_stamp in oplog_dict.items():
                    oplog_str = str(oplog)
                    timestamp = util.bson_ts_to_long(time_stamp)
                    json_str = json.dumps([oplog_str, timestamp])
                    try:
                        dest.write(json_str)
                    except IOError:
                        # Basically wipe the file, copy from backup
                        dest.truncate()
                        with open(backup_file, 'r') as backup:
                            shutil.copyfile(backup, dest)
                        break

        os.remove(self.oplog_checkpoint + '.backup')

    def read_oplog_progress(self):
        """Reads oplog progress from file provided by user.
        This method is only called once before any threads are spanwed.
        """

        if self.oplog_checkpoint is None:
            return None

        # Check for empty file
        try:
            if os.stat(self.oplog_checkpoint).st_size == 0:
                logging.info("MongoConnector: Empty oplog progress file.")
                return None
        except OSError:
            return None

        source = open(self.oplog_checkpoint, 'r')
        try:
            data = json.load(source)
        except ValueError:       # empty file
            reason = "It may be empty or corrupt."
            logging.info("MongoConnector: Can't read oplog progress file. %s" %
                (reason))
            source.close()
            return None

        source.close()

        count = 0
        oplog_dict = self.oplog_progress.get_dict()
        for count in range(0, len(data), 2):
            oplog_str = data[count]
            time_stamp = data[count + 1]
            oplog_dict[oplog_str] = util.long_to_bson_ts(time_stamp)
            #stored as bson_ts

    def run(self):
        """Discovers the mongo cluster and creates a thread for each primary.
        """
        main_conn = Connection(self.address)
        if self.auth_key is not None:
            main_conn['admin'].authenticate(self.auth_username, self.auth_key)
        self.read_oplog_progress()
        conn_type = None

        try:
            main_conn.admin.command("isdbgrid")
        except pymongo.errors.OperationFailure:
            conn_type = "REPLSET"

        if conn_type == "REPLSET":
            #non sharded configuration
            oplog_coll = main_conn['local']['oplog.rs']

            prim_admin = main_conn.admin
            repl_set = prim_admin.command("replSetGetStatus")['set']

            oplog = oplog_manager.OplogThread(main_conn,
                (main_conn.host + ":" + str(main_conn.port)),
                oplog_coll,
                False, self.doc_manager,
                self.oplog_progress,
                self.ns_set, self.auth_key,
                self.auth_username,
                repl_set=repl_set)
            self.shard_set[0] = oplog
            logging.info('MongoConnector: Starting connection thread %s' %
                         main_conn)
            oplog.start()

            while self.can_run:
                if not self.shard_set[0].running:
                    logging.error("MongoConnector: OplogThread"
                        " %s unexpectedly stopped! Shutting down" %
                        (str(self.shard_set[0])))
                    self.oplog_thread_join()
                    self.doc_manager.stop()
                    return

                self.write_oplog_progress()
                time.sleep(1)

        else:       # sharded cluster
            while self.can_run is True:

                for shard_doc in main_conn['config']['shards'].find():
                    shard_id = shard_doc['_id']
                    if shard_id in self.shard_set:
                        if not self.shard_set[shard_id].running:
                            logging.error("MongoConnector: OplogThread"
                                " %s unexpectedly stopped! Shutting down" %
                                (str(self.shard_set[shard_id])))
                            self.oplog_thread_join()
                            self.doc_manager.stop()
                            return

                        self.write_oplog_progress()
                        time.sleep(1)
                        continue
                    try:
                        repl_set, hosts = shard_doc['host'].split('/')
                    except ValueError:
                        cause = "The system only uses replica sets!"
                        logging.error("MongoConnector: %s", cause)
                        self.oplog_thread_join()
                        self.doc_manager.stop()
                        return

                    shard_conn = Connection(hosts, replicaset=repl_set)
                    oplog_coll = shard_conn['local']['oplog.rs']
                    oplog = oplog_manager.OplogThread(shard_conn, self.address,
                                                      oplog_coll, True,
                                                      self.doc_manager,
                                                      self.oplog_progress,
                                                      self.ns_set,
                                                      self.auth_key,
                                                      self.auth_username)
                    self.shard_set[shard_id] = oplog
                    msg = "Starting connection thread"
                    logging.info("MongoConnector: %s %s" % (msg, shard_conn))
                    oplog.start()

        self.oplog_thread_join()
        self.write_oplog_progress()

    def oplog_thread_join(self):
        """Stops all the OplogThreads
        """
        logging.info('MongoConnector: Stopping all OplogThreads')
        for thread in self.shard_set.values():
            thread.join()