def find_primary(self, force=False, quiet=False): if force or not self.primary: rs_status = self.get_rs_status(force, quiet) rs_name = rs_status['set'] for member in rs_status['members']: if member['stateStr'] == 'PRIMARY' and member['health'] > 0: member_uri = MongoUri(member['name'], 27017, rs_name) optime_ts = member['optime'] if isinstance(member['optime'], dict) and 'ts' in member['optime']: optime_ts = member['optime']['ts'] if quiet == False or not self.primary: logging.info("Found PRIMARY: %s with optime %s" % (member_uri, str(optime_ts))) self.primary = {'uri': member_uri, 'optime': optime_ts} self.replset_summary['primary'] = { "member": member, "uri": member_uri.str() } if self.primary is None: logging.error( "Unable to locate a PRIMARY member for replset %s, giving up" % rs_name) raise OperationError( "Unable to locate a PRIMARY member for replset %s, giving up" % rs_name) return self.primary
def run(self): self.timer.start(self.timer_name) # backup a secondary from each shard: for shard in self.replsets: secondary = self.replsets[shard].find_secondary() mongo_uri = secondary['uri'] self.states[shard] = OplogState(self.manager, mongo_uri) thread = MongodumpThread( self.states[shard], mongo_uri, self.timer, self.user, self.password, self.authdb, self.backup_dir, self.binary, self.threads(), self.do_gzip, self.verbose ) self.dump_threads.append(thread) if not len(self.dump_threads) > 0: raise OperationError('No backup threads started!') logging.info( "Starting backups using mongodump %s (options: compression=%s, threads_per_dump=%i)" % (self.version, self.compression(), self.threads())) for thread in self.dump_threads: thread.start() self.wait() # backup a single sccc/non-replset config server, if exists: if self.sharding: config_server = self.sharding.get_config_server() if config_server and isinstance(config_server, dict): logging.info("Using non-replset backup method for config server mongodump") mongo_uri = MongoUri(config_server['host'], 27019, 'configsvr') self.states['configsvr'] = OplogState(self.manager, mongo_uri) self.dump_threads = [MongodumpThread( self.states['configsvr'], mongo_uri, self.timer, self.user, self.password, self.authdb, self.backup_dir, self.binary, self.threads(), self.do_gzip, self.verbose )] self.dump_threads[0].start() self.dump_threads[0].join() self.completed = True return self._summary
def get_replsets(self, force=False): for shard in self.sharding.shards(): shard_uri = MongoUri(shard['host']) if force or shard_uri.replset not in self.replsets: rs_db = self.get_replset_connection(shard_uri) self.replsets[shard_uri.replset] = Replset(self.config, rs_db) configsvr = self.sharding.get_config_server() if configsvr and isinstance(configsvr, Replset): config_rs_name = configsvr.get_rs_name() self.replsets[config_rs_name] = configsvr return self.replsets
def find_primary(self, force=False, quiet=False): if force or not self.primary: rs_status = self.get_rs_status(force, quiet) rs_name = rs_status['set'] for member in rs_status['members']: if member['state'] == self.state_primary and member['health'] > 0: member_uri = MongoUri(member['name'], 27017, rs_name) optime_ts = member['optime'] if isinstance(member['optime'], dict) and 'ts' in member['optime']: optime_ts = member['optime']['ts'] if quiet is False or not self.primary: logging.info("Found PRIMARY: %s with optime %s" % ( member_uri, str(optime_ts) )) self.primary = { 'uri': member_uri, 'optime': optime_ts } self.replset_summary['primary'] = {"member": member, "uri": member_uri.str()} if self.primary is None: logging.error("Unable to locate a PRIMARY member for replset %s, giving up" % rs_name) raise OperationError("Unable to locate a PRIMARY member for replset %s, giving up" % rs_name) return self.primary
def stop(self, kill=False, sleep_secs=3): if not self.enabled(): return logging.info("Stopping all oplog tailers") for shard in self.shards: replset = self.replsets[shard] state = self.shards[shard]['state'] thread = self.shards[shard]['thread'] try: uri = MongoUri(state.get('uri')) except Exception, e: raise OperationError(e) if not kill: # get current optime of replset primary to use a stop position try: timestamp = replset.primary_optime(True, True) except Exception: logging.warning( "Could not get current optime from PRIMARY! Using now as a stop time" ) timestamp = Timestamp(int(time()), 0) # wait for replication to get in sync making sure cursor has not been stopped in a race condition while state.get('last_ts') and state.get( 'last_ts' ) < timestamp and not self.shards[shard]['thread'].stopped: logging.info( 'Waiting for %s tailer to reach ts: %s, currrent: %s' % (uri, timestamp, state.get('last_ts'))) sleep(sleep_secs) # set thread stop event self.shards[shard]['stop'].set() if kill: thread.terminate() sleep(1) # wait for thread to stop while thread.is_alive(): logging.info('Waiting for tailer %s to stop' % uri) sleep(sleep_secs) # gather state info self._summary[shard] = state.get().copy()
def find_secondary(self, force=False, quiet=False): rs_status = self.get_rs_status(force, quiet) self.get_rs_config(force, quiet) self.get_mongo_config(force, quiet) quorum = self.get_rs_quorum() rs_name = rs_status['set'] if self.secondary and not force: return self.secondary electable_count = 0 for member in rs_status['members']: member_uri = MongoUri(member['name'], 27017, rs_name) member_config = self.get_rs_config_member(member) if self.is_member_electable(member_config): electable_count += 1 if member['state'] == self.state_arbiter: logging.info("Found ARBITER %s, skipping" % member_uri) elif member['state'] > self.state_secondary: logging.warning("Found down or unhealthy SECONDARY %s with state: %s" % (member_uri, member['stateStr'])) elif member['state'] == self.state_secondary and member['health'] > 0: log_data = {} score = self.max_lag_secs * 10 score_scale = 100.00 / float(score) priority = 0 if self.read_pref_tags and not self.has_read_pref_tags(member_config): logging.info("Found SECONDARY %s without read preference tags: %s, skipping" % ( member_uri, parse_read_pref_tags(self.read_pref_tags) )) continue if 'hidden' in member_config and member_config['hidden']: score += (score * self.hidden_weight) log_data['hidden'] = True if 'priority' in member_config: priority = int(member_config['priority']) log_data['priority'] = priority if member_config['priority'] > 1: score -= priority - 1 elif member_config['priority'] == 0: score += (score * self.pri0_weight) if priority < self.min_priority or priority > self.max_priority: logging.info("Found SECONDARY %s with out-of-bounds priority! Skipping" % member_uri) continue elif self.hidden_only and 'hidden' not in log_data: logging.info("Found SECONDARY %s that is non-hidden and hidden-only mode is enabled! Skipping" % member_uri) continue if member_uri.str() in self.preferred_members: logging.info("Bumping preferred SECONDARY member %s's score", member_uri) score = 10000 rep_lag, optime_ts = self.get_repl_lag(member) score = ceil((score - rep_lag) * score_scale) if rep_lag < self.max_lag_secs: if self.secondary is None or score > self.secondary['score']: self.secondary = { 'replSet': rs_name, 'uri': member_uri, 'optime': optime_ts, 'score': score } log_msg = "Found SECONDARY %s" % member_uri else: log_msg = "Found SECONDARY %s with too high replication lag! Skipping" % member_uri if self.secondary is not None and self.secondary['score'] == 0: logging.error("Chosen SECONDARY %s has a score of zero/0! This is unexpected, exiting" % member_uri) raise OperationError("Chosen SECONDARY %s has a score of zero/0!" % member_uri) if 'configsvr' in rs_status and rs_status['configsvr']: log_data['configsvr'] = True log_data['lag'] = rep_lag log_data['optime'] = optime_ts log_data['score'] = int(score) logging.info("%s: %s" % (log_msg, str(log_data))) self.replset_summary['secondary'] = {"member": member, "uri": member_uri.str(), "data": log_data} if self.secondary is None or electable_count < quorum: logging.error("Not enough valid secondaries in replset %s to take backup! Num replset electable members: %i, required quorum: %i" % ( rs_name, electable_count, quorum )) raise OperationError("Not enough secondaries in replset %s to safely take backup!" % rs_name) logging.info("Choosing SECONDARY %s for replica set %s (score: %i)" % (self.secondary['uri'], rs_name, self.secondary['score'])) return self.secondary
def find_secondary(self, force=False, quiet=False): rs_status = self.get_rs_status(force, quiet) rs_config = self.get_rs_config(force, quiet) db_config = self.get_mongo_config(force, quiet) rs_name = rs_status['set'] quorum = ceil(len(rs_status['members']) / 2.0) if self.secondary and not force: return self.secondary for member in rs_status['members']: member_uri = MongoUri(member['name'], 27017, rs_name) if member['state'] == 7: logging.info("Found ARBITER %s, skipping" % member_uri) elif member['state'] > 2: logging.warning("Found down or unhealthy SECONDARY %s with state: %s" % (member_uri, member['stateStr'])) elif member['state'] == 2 and member['health'] > 0: log_data = {} score = self.max_lag_secs * 10 score_scale = 100 / score priority = 0 member_config = self.get_rs_config_member(member) if 'hidden' in member_config and member_config['hidden']: score += (score * self.hidden_weight) log_data['hidden'] = True if 'priority' in member_config: priority = int(member_config['priority']) log_data['priority'] = priority if member_config['priority'] > 1: score -= priority - 1 elif member_config['priority'] == 0: score += (score * self.pri0_weight) if priority < self.min_priority or priority > self.max_priority: logging.info("Found SECONDARY %s with out-of-bounds priority! Skipping" % member_uri) continue elif self.hidden_only and not 'hidden' in log_data: logging.info("Found SECONDARY %s that is non-hidden and hidden-only mode is enabled! Skipping" % member_uri) continue rep_lag, optime_ts = self.get_repl_lag(member) score = ceil((score - rep_lag) * score_scale) if rep_lag < self.max_lag_secs: if self.secondary is None or score > self.secondary['score']: self.secondary = { 'replSet': rs_name, 'count': 1 if self.secondary is None else self.secondary['count'] + 1, 'uri': member_uri, 'optime': optime_ts, 'score': score } log_msg = "Found SECONDARY %s" % member_uri else: log_msg = "Found SECONDARY %s with too high replication lag! Skipping" % member_uri if 'configsvr' in rs_status and rs_status['configsvr']: log_data['configsvr'] = True log_data['lag'] = rep_lag log_data['optime'] = optime_ts log_data['score'] = int(score) logging.info("%s: %s" % (log_msg, str(log_data))) self.replset_summary['secondary'] = { "member": member, "uri": member_uri.str(), "data": log_data } if self.secondary is None or (self.secondary['count'] + 1) < quorum: secondary_count = self.secondary['count'] + 1 if self.secondary else 0 logging.error("Not enough valid secondaries in replset %s to take backup! Num replset members: %i, required quorum: %i" % ( rs_name, secondary_count, quorum )) raise OperationError("Not enough secondaries in replset %s to safely take backup!" % rs_name) logging.info("Choosing SECONDARY %s for replica set %s (score: %i)" % (self.secondary['uri'], rs_name, self.secondary['score'])) return self.secondary