Ejemplo n.º 1
0
 def find_primary(self, force=False, quiet=False):
     if force or not self.primary:
         rs_status = self.get_rs_status(force, quiet)
         rs_name = rs_status['set']
         for member in rs_status['members']:
             if member['stateStr'] == 'PRIMARY' and member['health'] > 0:
                 member_uri = MongoUri(member['name'], 27017, rs_name)
                 optime_ts = member['optime']
                 if isinstance(member['optime'],
                               dict) and 'ts' in member['optime']:
                     optime_ts = member['optime']['ts']
                 if quiet == False or not self.primary:
                     logging.info("Found PRIMARY: %s with optime %s" %
                                  (member_uri, str(optime_ts)))
                 self.primary = {'uri': member_uri, 'optime': optime_ts}
                 self.replset_summary['primary'] = {
                     "member": member,
                     "uri": member_uri.str()
                 }
         if self.primary is None:
             logging.error(
                 "Unable to locate a PRIMARY member for replset %s, giving up"
                 % rs_name)
             raise OperationError(
                 "Unable to locate a PRIMARY member for replset %s, giving up"
                 % rs_name)
     return self.primary
Ejemplo n.º 2
0
    def run(self):
        self.timer.start(self.timer_name)

        # backup a secondary from each shard:
        for shard in self.replsets:
            secondary = self.replsets[shard].find_secondary()
            mongo_uri = secondary['uri']
            self.states[shard] = OplogState(self.manager, mongo_uri)
            thread = MongodumpThread(
                self.states[shard],
                mongo_uri,
                self.timer,
                self.user,
                self.password,
                self.authdb,
                self.backup_dir,
                self.binary,
                self.threads(),
                self.do_gzip,
                self.verbose
            )
            self.dump_threads.append(thread)

        if not len(self.dump_threads) > 0:
            raise OperationError('No backup threads started!')

        logging.info(
            "Starting backups using mongodump %s (options: compression=%s, threads_per_dump=%i)" % (self.version, self.compression(), self.threads()))
        for thread in self.dump_threads:
            thread.start()
        self.wait()

        # backup a single sccc/non-replset config server, if exists:
        if self.sharding:
            config_server = self.sharding.get_config_server()
            if config_server and isinstance(config_server, dict):
                logging.info("Using non-replset backup method for config server mongodump")
                mongo_uri = MongoUri(config_server['host'], 27019, 'configsvr')
                self.states['configsvr'] = OplogState(self.manager, mongo_uri)
                self.dump_threads = [MongodumpThread(
                    self.states['configsvr'],
                    mongo_uri,
                    self.timer,
                    self.user,
                    self.password,
                    self.authdb,
                    self.backup_dir,
                    self.binary,
                    self.threads(),
                    self.do_gzip,
                    self.verbose
                )]
                self.dump_threads[0].start()
                self.dump_threads[0].join()

        self.completed = True
        return self._summary
    def get_replsets(self, force=False):
        for shard in self.sharding.shards():
            shard_uri = MongoUri(shard['host'])
            if force or shard_uri.replset not in self.replsets:
                rs_db = self.get_replset_connection(shard_uri)
                self.replsets[shard_uri.replset] = Replset(self.config, rs_db)

        configsvr = self.sharding.get_config_server()
        if configsvr and isinstance(configsvr, Replset):
            config_rs_name = configsvr.get_rs_name()
            self.replsets[config_rs_name] = configsvr

        return self.replsets
 def find_primary(self, force=False, quiet=False):
     if force or not self.primary:
         rs_status = self.get_rs_status(force, quiet)
         rs_name   = rs_status['set']
         for member in rs_status['members']:
             if member['state'] == self.state_primary and member['health'] > 0:
                 member_uri = MongoUri(member['name'], 27017, rs_name)
                 optime_ts  = member['optime']
                 if isinstance(member['optime'], dict) and 'ts' in member['optime']:
                     optime_ts = member['optime']['ts']
                 if quiet is False or not self.primary:
                     logging.info("Found PRIMARY: %s with optime %s" % (
                         member_uri,
                         str(optime_ts)
                     ))
                 self.primary = {
                     'uri': member_uri,
                     'optime': optime_ts
                 }
                 self.replset_summary['primary'] = {"member": member, "uri": member_uri.str()}
         if self.primary is None:
             logging.error("Unable to locate a PRIMARY member for replset %s, giving up" % rs_name)
             raise OperationError("Unable to locate a PRIMARY member for replset %s, giving up" % rs_name)
     return self.primary
Ejemplo n.º 5
0
    def stop(self, kill=False, sleep_secs=3):
        if not self.enabled():
            return
        logging.info("Stopping all oplog tailers")
        for shard in self.shards:
            replset = self.replsets[shard]
            state = self.shards[shard]['state']
            thread = self.shards[shard]['thread']

            try:
                uri = MongoUri(state.get('uri'))
            except Exception, e:
                raise OperationError(e)

            if not kill:
                # get current optime of replset primary to use a stop position
                try:
                    timestamp = replset.primary_optime(True, True)
                except Exception:
                    logging.warning(
                        "Could not get current optime from PRIMARY! Using now as a stop time"
                    )
                    timestamp = Timestamp(int(time()), 0)

                # wait for replication to get in sync making sure cursor has not been stopped in a race condition
                while state.get('last_ts') and state.get(
                        'last_ts'
                ) < timestamp and not self.shards[shard]['thread'].stopped:
                    logging.info(
                        'Waiting for %s tailer to reach ts: %s, currrent: %s' %
                        (uri, timestamp, state.get('last_ts')))
                    sleep(sleep_secs)

            # set thread stop event
            self.shards[shard]['stop'].set()
            if kill:
                thread.terminate()
            sleep(1)

            # wait for thread to stop
            while thread.is_alive():
                logging.info('Waiting for tailer %s to stop' % uri)
                sleep(sleep_secs)

            # gather state info
            self._summary[shard] = state.get().copy()
Ejemplo n.º 6
0
    def find_secondary(self, force=False, quiet=False):
        rs_status = self.get_rs_status(force, quiet)

        self.get_rs_config(force, quiet)
        self.get_mongo_config(force, quiet)

        quorum  = self.get_rs_quorum()
        rs_name = rs_status['set']

        if self.secondary and not force:
            return self.secondary

        electable_count = 0
        for member in rs_status['members']:
            member_uri    = MongoUri(member['name'], 27017, rs_name)
            member_config = self.get_rs_config_member(member)

            if self.is_member_electable(member_config):
                electable_count += 1

            if member['state'] == self.state_arbiter:
                logging.info("Found ARBITER %s, skipping" % member_uri)
            elif member['state'] > self.state_secondary:
                logging.warning("Found down or unhealthy SECONDARY %s with state: %s" % (member_uri, member['stateStr']))
            elif member['state'] == self.state_secondary and member['health'] > 0:
                log_data    = {}
                score       = self.max_lag_secs * 10
                score_scale = 100.00 / float(score)
                priority    = 0

                if self.read_pref_tags and not self.has_read_pref_tags(member_config):
                    logging.info("Found SECONDARY %s without read preference tags: %s, skipping" % (
                        member_uri,
                        parse_read_pref_tags(self.read_pref_tags)
                    ))
                    continue

                if 'hidden' in member_config and member_config['hidden']:
                    score += (score * self.hidden_weight)
                    log_data['hidden'] = True
                if 'priority' in member_config:
                    priority = int(member_config['priority'])
                    log_data['priority'] = priority
                    if member_config['priority'] > 1:
                        score -= priority - 1
                    elif member_config['priority'] == 0:
                        score += (score * self.pri0_weight)
                    if priority < self.min_priority or priority > self.max_priority:
                        logging.info("Found SECONDARY %s with out-of-bounds priority! Skipping" % member_uri)
                        continue
                elif self.hidden_only and 'hidden' not in log_data:
                    logging.info("Found SECONDARY %s that is non-hidden and hidden-only mode is enabled! Skipping" % member_uri)
                    continue

                if member_uri.str() in self.preferred_members:
                    logging.info("Bumping preferred SECONDARY member %s's score", member_uri)
                    score = 10000

                rep_lag, optime_ts = self.get_repl_lag(member)
                score = ceil((score - rep_lag) * score_scale)
                if rep_lag < self.max_lag_secs:
                    if self.secondary is None or score > self.secondary['score']:
                        self.secondary = {
                            'replSet': rs_name,
                            'uri': member_uri,
                            'optime': optime_ts,
                            'score': score
                        }
                    log_msg = "Found SECONDARY %s" % member_uri
                else:
                    log_msg = "Found SECONDARY %s with too high replication lag! Skipping" % member_uri

                if self.secondary is not None and self.secondary['score'] == 0:
                    logging.error("Chosen SECONDARY %s has a score of zero/0! This is unexpected, exiting" % member_uri)
                    raise OperationError("Chosen SECONDARY %s has a score of zero/0!" % member_uri)

                if 'configsvr' in rs_status and rs_status['configsvr']:
                    log_data['configsvr'] = True

                log_data['lag']    = rep_lag
                log_data['optime'] = optime_ts
                log_data['score']  = int(score)
                logging.info("%s: %s" % (log_msg, str(log_data)))
                self.replset_summary['secondary'] = {"member": member, "uri": member_uri.str(), "data": log_data}
        if self.secondary is None or electable_count < quorum:
            logging.error("Not enough valid secondaries in replset %s to take backup! Num replset electable members: %i, required quorum: %i" % (
                rs_name,
                electable_count,
                quorum
            ))
            raise OperationError("Not enough secondaries in replset %s to safely take backup!" % rs_name)

        logging.info("Choosing SECONDARY %s for replica set %s (score: %i)" % (self.secondary['uri'], rs_name, self.secondary['score']))
        return self.secondary
Ejemplo n.º 7
0
    def find_secondary(self, force=False, quiet=False):
        rs_status = self.get_rs_status(force, quiet)
        rs_config = self.get_rs_config(force, quiet)
        db_config = self.get_mongo_config(force, quiet)
        rs_name   = rs_status['set']
        quorum    = ceil(len(rs_status['members']) / 2.0)

        if self.secondary and not force:
            return self.secondary

        for member in rs_status['members']:
            member_uri = MongoUri(member['name'], 27017, rs_name)
            if member['state'] == 7:
                logging.info("Found ARBITER %s, skipping" % member_uri)
            elif member['state'] > 2:
                logging.warning("Found down or unhealthy SECONDARY %s with state: %s" % (member_uri, member['stateStr']))
            elif member['state'] == 2 and member['health'] > 0:
                log_data      = {}
                score         = self.max_lag_secs * 10
                score_scale   = 100 / score
                priority      = 0
                member_config = self.get_rs_config_member(member)
                if 'hidden' in member_config and member_config['hidden']:
                    score += (score * self.hidden_weight)
                    log_data['hidden'] = True
                if 'priority' in member_config:
                    priority = int(member_config['priority'])
                    log_data['priority'] = priority
                    if member_config['priority'] > 1:
                        score -= priority - 1
                    elif member_config['priority'] == 0:
                        score += (score * self.pri0_weight)            
                    if priority < self.min_priority or priority > self.max_priority:
                        logging.info("Found SECONDARY %s with out-of-bounds priority! Skipping" % member_uri)
                        continue
                elif self.hidden_only and not 'hidden' in log_data:
                    logging.info("Found SECONDARY %s that is non-hidden and hidden-only mode is enabled! Skipping" % member_uri)
                    continue

                rep_lag, optime_ts = self.get_repl_lag(member)
                score = ceil((score - rep_lag) * score_scale)
                if rep_lag < self.max_lag_secs:
                    if self.secondary is None or score > self.secondary['score']:
                        self.secondary = {
                            'replSet': rs_name,
                            'count': 1 if self.secondary is None else self.secondary['count'] + 1,
                            'uri': member_uri,
                            'optime': optime_ts,
                            'score': score
                        }
                    log_msg = "Found SECONDARY %s" % member_uri
                else:
                    log_msg = "Found SECONDARY %s with too high replication lag! Skipping" % member_uri

                if 'configsvr' in rs_status and rs_status['configsvr']:
                    log_data['configsvr'] = True

                log_data['lag']    = rep_lag
                log_data['optime'] = optime_ts
                log_data['score']  = int(score)
                logging.info("%s: %s" % (log_msg, str(log_data)))
                self.replset_summary['secondary'] = { "member": member, "uri": member_uri.str(), "data": log_data }
        if self.secondary is None or (self.secondary['count'] + 1) < quorum:
            secondary_count = self.secondary['count'] + 1 if self.secondary else 0
            logging.error("Not enough valid secondaries in replset %s to take backup! Num replset members: %i, required quorum: %i" % (
                rs_name,
                secondary_count,
                quorum
            ))
            raise OperationError("Not enough secondaries in replset %s to safely take backup!" % rs_name)

        logging.info("Choosing SECONDARY %s for replica set %s (score: %i)" % (self.secondary['uri'], rs_name, self.secondary['score']))
        return self.secondary