def process_build_info(self, build_info): displayname = build_info['displayName'] duration = build_info['duration'] if not isInt(duration): raise UnknownError('duration field returned non-integer! {0}'.format(support_msg_api())) duration = int(duration) / 1000 result = build_info['result'] timestamp = build_info['timestamp'] if not isInt(timestamp): raise UnknownError('timestamp field returned non-integer! {0}'.format(support_msg_api())) timestamp = int(timestamp) building = build_info['building'] self.msg += "build {build} status: ".format(build=displayname) if building: self.unknown() self.msg += 'STILL BUILDING!' return self.msg += result if result != 'SUCCESS': self.critical() self.msg += ', duration={duration} secs'.format(duration=duration) self.check_thresholds(duration) age = time.time() - (timestamp/1000) self.msg += ', age={age} secs'.format(age=sec2human(age)) if age < 0: self.warning() self.msg += ' (< 0!)' if self.age and age > self.age: self.critical() self.msg += ' (> {0:d})'.format(self.age) self.msg += ' | build_duration={duration}s{perf_thresholds}'.format(duration=duration, \ perf_thresholds=self.get_perf_thresholds())
def extract_value(self, content): json_data = None try: json_data = json.loads(content) except ValueError: raise UnknownError("non-json data returned by consul: '%s'. %s" % (content, support_msg_api())) value = None if not isList(json_data): raise UnknownError("non-list returned by consul: '%s'. %s" % (content, support_msg_api())) if not json_data: raise UnknownError("blank list returned by consul! '%s'. %s" % (content, support_msg_api())) if len(json_data) > 1: raise UnknownError("more than one key returned by consul! response = '%s'. %s" \ % (content, support_msg_api())) try: value = json_data[0]['Value'] except KeyError: raise UnknownError("couldn't find field 'Value' in response from consul: '%s'. %s" % (content, support_msg_api())) try: # decodestring might be deprecated but decodebytes isn't available on Python 2.7 #value = base64.decodebytes(value) value = base64.decodestring(value) except TypeError: raise UnknownError("invalid data returned for key '{0}' value = '{1}', failed to base64 decode" .format(self.key, value)) return value
def extract_value(self, content): # pylint: disable=no-self-use json_data = None try: json_data = json.loads(content) except ValueError: raise UnknownError("non-json data returned by consul: '%s'. %s" % (content, support_msg_api())) value = None if not isList(json_data): raise UnknownError("non-list returned by consul: '%s'. %s" % (content, support_msg_api())) if not json_data: raise UnknownError("blank list returned by consul! '%s'. %s" % (content, support_msg_api())) if len(json_data) > 1: raise UnknownError( "more than one key returned by consul! response = '%s'. %s" % (content, support_msg_api()) ) try: value = json_data[0]["Value"] except KeyError: raise UnknownError( "couldn't find field 'Value' in response from consul: '%s'. %s" % (content, support_msg_api()) ) try: value = base64.decodestring(value) except TypeError as _: raise UnknownError( "invalid data returned for key '{0}' value = '{1}', failed to base64 decode".format(self.key, value) ) return value
def extract_value(self, content): # pylint: disable=no-self-use json_data = None try: json_data = json.loads(content) except ValueError: qquit('UNKNOWN', "non-json data returned by consul: '%s'. %s" % (content, support_msg_api())) value = None if not isList(json_data): qquit('UNKNOWN', "non-list returned by consul: '%s'. %s" % (content, support_msg_api())) if not json_data: qquit('UNKNOWN', "blank list returned by consul! '%s'. %s" % (content, support_msg_api())) if len(json_data) > 1: qquit('UNKNOWN', "more than one key returned by consul! response = '%s'. %s" \ % (content, support_msg_api())) try: value = json_data[0]['Value'] except KeyError: qquit('UNKNOWN', "couldn't find field 'Value' in response from consul: '%s'. %s" \ % (content, support_msg_api())) try: value = base64.decodestring(value) except TypeError as _: qquit('UNKNOWN', "invalid data returned for key '%(key)s' value = '%(value)s', failed to base64 decode" \ % locals()) return value
def get_stat(node, stat): stat_num = node[stat] if not isFloat(stat_num): raise UnknownError('{stat} is not a float! {msg}'.format(stat=stat, msg=support_msg_api())) stat_num = float('{0:.2f}'.format(stat_num)) if stat_num < 0: raise UnknownError('{stat} < 0 ?!!! {msg}'.format(stat=stat, msg=support_msg_api())) return stat_num
def get_recent_failure_ratio(node): recent_failure_ratio = node['recentFailureRatio'] if not isFloat(recent_failure_ratio): raise UnknownError('recentFailureRatio is not a float! {0}'.format(support_msg_api())) recent_failure_ratio = float('{0:.2f}'.format(recent_failure_ratio)) if recent_failure_ratio < 0: raise UnknownError('recentFailureRatio < 0 ?!!! {0}'.format(support_msg_api())) if recent_failure_ratio > 1: raise UnknownError('recentFailureRatio > 1 ?!!! {0}'.format(support_msg_api())) return recent_failure_ratio
def check_read(self, table_conn, row, column, expected=None): log.info("getting cell for row '%s' column '%s'", row, column) cells = [] query_time = None start = time.time() cells = table_conn.cells(row, column, versions=1) query_time = (time.time() - start) * 1000 log.info('query read in %s ms', query_time) cell_info = "HBase table '{0}' row '{1}' column '{2}'".format( self.table, row, column) log.debug('cells returned: %s', cells) if not isList(cells): qquit('UNKNOWN', 'non-list returned for cells. ' + support_msg_api()) if len(cells) < 1: qquit( 'CRITICAL', "no cell value found in {0}, does row / column family combination exist?" .format(cell_info)) elif len(cells) > 1: qquit('UNKNOWN', "more than one cell returned! " + support_msg_api()) value = cells[0] log.info('value = %s', value) if self.regex: log.info( "checking cell's value '{0}' against expected regex '{1}'". format(value, self.regex)) if not re.search(self.regex, value): qquit( 'CRITICAL', "cell value '{0}' (expected regex '{1}') for {2}".format( value, self.regex, cell_info)) if expected: log.info( "checking cell's value is exactly expected value '{0}'".format( expected)) if value != expected: qquit( 'CRITICAL', "cell value '{0}' (expected '{1}') for {2}".format( value, expected, cell_info)) self.timings[column] = self.timings.get(column, {}) self.timings[column]['read'] = max(self.timings[column].get('read', 0), query_time) self.value = value return (value, query_time)
def parse_json(self, json_data): log.info('parsing response') try: live_nodes = json_data['beans'][0]['LiveNodes'] live_node_data = json.loads(live_nodes) num_datanodes = len(live_node_data) if num_datanodes < 1: raise CriticalError("no live datanodes returned by JMX API from namenode '{0}:{1}'"\ .format(self.host, self.port)) max_blocks = 0 min_blocks = None for datanode in live_node_data: blocks = live_node_data[datanode]['numBlocks'] if not isInt(blocks): raise UnknownError( 'numBlocks is not an integer! {0}'.format( support_msg_api())) blocks = int(blocks) log.info("datanode '%s' has %s blocks", datanode, blocks) if blocks > max_blocks: max_blocks = blocks if min_blocks is None or blocks < min_blocks: min_blocks = blocks log.info("max blocks on a single datanode = %s", max_blocks) log.info("min blocks on a single datanode = %s", min_blocks) assert min_blocks is not None divisor = min_blocks if min_blocks < 1: log.info( "min blocks < 1, resetting divisor to 1 (% will be very high)" ) divisor = 1 block_imbalance = float("{0:.2f}".format( (max_blocks - min_blocks) / divisor * 100)) self.msg = '{0}% block imbalance across {1} datanode{2}'\ .format(block_imbalance, num_datanodes, plural(num_datanodes)) self.ok() self.check_thresholds(block_imbalance) if self.verbose: self.msg += ' (min blocks = {0}, max blocks = {1})'.format( min_blocks, max_blocks) self.msg += " | block_imbalance={0}%".format(block_imbalance) self.msg += self.get_perf_thresholds() self.msg += " num_datanodes={0}".format(num_datanodes) self.msg += " min_blocks={0}".format(min_blocks) self.msg += " max_blocks={0}".format(max_blocks) except KeyError as _: raise UnknownError("failed to parse json returned by NameNode at '{0}:{1}': {2}. {3}"\ .format(self.host, self.port, _, support_msg_api())) except ValueError as _: raise UnknownError("invalid json returned for LiveNodes by Namenode '{0}:{1}': {2}"\ .format(self.host, self.port, _))
def get_recent_failure_ratio(node): recent_failure_ratio = node['recentFailureRatio'] if not isFloat(recent_failure_ratio): raise UnknownError('recentFailureRatio is not a float! {0}'.format( support_msg_api())) recent_failure_ratio = float('{0:.2f}'.format(recent_failure_ratio)) if recent_failure_ratio < 0: raise UnknownError('recentFailureRatio < 0 ?!!! {0}'.format( support_msg_api())) if recent_failure_ratio > 1: raise UnknownError('recentFailureRatio > 1 ?!!! {0}'.format( support_msg_api())) return recent_failure_ratio
def parse_json(self, json_data): if self.list_jobs: print('Jenkins Jobs:\n') for job in json_data['jobs']: print(job['name']) sys.exit(ERRORS['UNKNOWN']) if 'lastCompletedBuild' in json_data: last_completed_build = json_data['lastCompletedBuild'] if not last_completed_build: raise WarningError( "job '{job}' not built yet".format(job=self.job)) self.path = '/job/{job}/{number}/api/json'.format( job=self.job, number=last_completed_build['number']) req = self.query() self.process_json(req.content) return displayname = json_data['displayName'] duration = json_data['duration'] if not isInt(duration): raise UnknownError( 'duration field returned non-integer! {0}'.format( support_msg_api())) duration = int(duration) / 1000 result = json_data['result'] timestamp = json_data['timestamp'] if not isInt(timestamp): raise UnknownError( 'timestamp field returned non-integer! {0}'.format( support_msg_api())) timestamp = int(timestamp) building = json_data['building'] self.msg += "build {build} status: ".format(build=displayname) if building: self.unknown() self.msg += 'STILL BUILDING!' return self.msg += result if result != 'SUCCESS': self.critical() self.msg += ', duration={duration} secs'.format(duration=duration) self.check_thresholds(duration) age = time.time() - (timestamp / 1000) self.msg += ', age={age} secs'.format(age=sec2human(age)) if age < 0: self.warning() self.msg += ' (< 0!)' if self.age and age > self.age: self.critical() self.msg += ' (> {0:d})'.format(self.age) self.msg += ' | build_duration={duration}s{perf_thresholds}'.format( duration=duration, perf_thresholds=self.get_perf_thresholds())
def get_peers(content): json_data = None try: json_data = json.loads(content) except ValueError: raise UnknownError("non-json data returned by consul: '%s'. %s" % (content, support_msg_api())) if not isList(json_data): raise UnknownError("non-list returned by consul: '%s'. %s" % (content, support_msg_api())) if not json_data: raise UnknownError("blank list returned by consul! '%s'. %s" % (content, support_msg_api())) for peer in json_data: log.debug('peer: {0}'.format(peer)) peers = uniq_list(json_data) return peers
def get_request_ids(self): content = self.get('/clusters/{cluster}/requests'.format(cluster=self.cluster)) try: _ = json.loads(content) request_ids = [] for item in _['items']: if item['Requests']['cluster_name'] == self.cluster: request_id = item['Requests']['id'] if not isInt(request_id): die('request id returned was not an integer! ' + support_msg_api()) request_ids.append(request_id) return request_ids except (KeyError, ValueError) as _: die('failed to parse response for request IDs: {0}. '.format(_) + support_msg_api())
def run(self): (version, cluster_version) = self.get_version() if not isVersion(version): qquit('UNKNOWN', '{0} version unrecognized \'{1}\'. {2}'\ .format(self.software, version, support_msg_api())) if not isVersion(cluster_version): qquit('UNKNOWN', '{0} cluster version unrecognized \'{1}\'. {2}'\ .format(self.software, cluster_version, support_msg_api())) self.msg = '{0} version = {1}'.format(self.software, version) if self.expected is not None and not re.search(self.expected, version): self.msg += " (expected '{0}')".format(self.expected) self.critical() #super(CheckEtcdVersion, self).run() self.msg += ', cluster version = {0}'.format(cluster_version)
def parse_consul_json(self, name, content): json_data = None try: json_data = json.loads(content) except ValueError: raise UnknownError("non-json {} data returned by consul at {}:{}: '{}'. {}"\ .format(name, self.host, self.port, content, support_msg_api())) if not json_data: raise UnknownError("blank {} contents returned by consul at {}:{}! '{}'. {}"\ .format(name, self.host, self.port, content, support_msg_api())) if not isList(json_data): raise UnknownError('non-list {} returned by consul at {}:{} for session data. {}'\ .format(name, self.host, self.port, support_msg_api())) return json_data
def parse_json(self, json_data): if not isList(json_data): raise UnknownError( 'non-list returned by Presto for nodes. {0}'.format( support_msg_api())) nodes_failing = [] max_ratio = 0.0 re_protocol = re.compile('^https?://') num_nodes = len(json_data) for node_item in json_data: recent_failure_ratio = node_item['recentFailureRatio'] if not isFloat(recent_failure_ratio): raise UnknownError( 'recentFailureRatio is not a float! {0}'.format( support_msg_api())) recent_failure_ratio = float( '{0:.2f}'.format(recent_failure_ratio)) if recent_failure_ratio < 0: raise UnknownError('recentFailureRatio < 0 ?!!! {0}'.format( support_msg_api())) if recent_failure_ratio > 1: raise UnknownError('recentFailureRatio > 1 ?!!! {0}'.format( support_msg_api())) if recent_failure_ratio > max_ratio: max_ratio = recent_failure_ratio if recent_failure_ratio > self.max_ratio: uri = node_item['uri'] uri = re_protocol.sub('', uri) nodes_failing += [uri] log.info("node '%s' recent failure ratio %f > max ratio %f", node_item['uri'], recent_failure_ratio, self.max_ratio) elif recent_failure_ratio: log.info( "node '%s' recent failures ratio %f, but less than max ratio threshold of %f", node_item['uri'], recent_failure_ratio, self.max_ratio) num_nodes_failing = len(nodes_failing) self.msg = 'Presto SQL - worker nodes with recent failure ratio > {0:.2f} = {1:d}'\ .format(self.max_ratio, num_nodes_failing) self.check_thresholds(num_nodes_failing) self.msg += ' out of {0:d} nodes'.format(num_nodes) if num_nodes < 1: self.warning() self.msg += ' (< 1 worker found)' self.msg += ', max recent failure ratio = {0:.2f}'.format(max_ratio) if self.verbose and nodes_failing: self.msg += ' [{0}]'.format(','.join(nodes_failing)) self.msg += ' | num_nodes_failing={0}{1} max_ratio={2:.2f}'\ .format(num_nodes_failing, self.get_perf_thresholds(), max_ratio)
def get_version(self): content = self.get() try: json_list = json.loads(content) if log.isEnabledFor(logging.DEBUG): print(jsonpp(json_list)) print('=' * 80) if not isList(json_list): raise ValueError( "non-list returned by API (is type '{0}')".format( type(json_list))) json_dict = json_list[0] if not isDict(json_dict): raise ValueError( "non-dict found inside returned list (is type '{0}')". format(type(json_dict))) company_name = json_dict['company_name'] company_website = json_dict['company_website'] regex = re.compile(r'Blue\s*Talon', re.I) if not regex.match(company_name) and \ not regex.match(company_website): qquit('UNKNOWN', 'Blue Talon name was not found in either company_name or company_website fields' \ + ', are you definitely querying a Blue Talon server?') build_version = json_dict['build_version'] update_date = json_dict['update_date'] api_version = json_dict['api_version'] if not isVersion(api_version): qquit('UNKNOWN', '{0} api version unrecognized \'{1}\'. {2}'\ .format(self.software, api_version, support_msg_api())) if api_version != self.api_version: qquit('UNKNOWN', "unexpected API version '{0}' returned (expected '{1}')"\ .format(api_version, self.api_version)) if self.verbose: extra_info = ' revision {revision} build {build}, schema revision = {schema_revision}'\ .format(revision=json_dict['revision_no'], build=json_dict['build_no'], schema_revision=json_dict['schema_revision']) extra_info += ', api version = {api_version}, update date = {update_date}'\ .format(api_version=api_version, update_date=update_date) else: extra_info = ', update date = {update_date}'.format( update_date=update_date) except (KeyError, ValueError) as _: qquit('UNKNOWN', 'error parsing output from {software}: {exception}: {error}. {support_msg}'\ .format(software=self.software, exception=type(_).__name__, error=_, support_msg=support_msg_api())) return (build_version, extra_info)
def parse_json(self, json_data): log.info('parsing response') try: live_nodes = json_data['beans'][0]['LiveNodes'] live_node_data = json.loads(live_nodes) num_datanodes = len(live_node_data) if num_datanodes < 1: raise CriticalError("no live datanodes returned by JMX API from namenode '{0}:{1}'"\ .format(self.host, self.port)) min_space = None max_space = 0 for datanode in live_node_data: used_space = live_node_data[datanode]['usedSpace'] if not isInt(used_space): raise UnknownError('usedSpace is not an integer! {0}'.format(support_msg_api())) used_space = int(used_space) log.info("datanode '%s' used space = %s", datanode, used_space) if min_space is None or used_space < min_space: min_space = used_space if used_space > max_space: max_space = used_space divisor = max_space if divisor < 1: log.info('min used space < 1, resetting divisor to 1 (% will likely be very high)') divisor = 1 assert max_space >= min_space largest_imbalance_pc = float('{0:.2f}'.format(((max_space - min_space) / divisor) * 100)) assert largest_imbalance_pc >= 0 self.ok() self.msg = '{0}% HDFS imbalance on space used'.format(largest_imbalance_pc) self.check_thresholds(largest_imbalance_pc) self.msg += ' across {0:d} datanode{1}'.format(num_datanodes, plural(num_datanodes)) if self.verbose: self.msg += ', min used space = {0}, max used space = {1}'.format(min_space, max_space) if self.verbose and (self.is_warning() or self.is_critical()): self.msg += ' [imbalanced nodes: ' for datanode in live_node_data: used_space = live_node_data[datanode]['usedSpace'] if (used_space / max_space * 100) > self.thresholds['warning']['upper']: self.msg += '{0}({1:.2f%}),'.format(datanode, used_space) self.msg = self.msg.rstrip(',') + ']' self.msg += " | 'HDFS imbalance on space used %'={0}".format(largest_imbalance_pc) self.msg += self.get_perf_thresholds() self.msg += " num_datanodes={0}".format(num_datanodes) self.msg += " min_used_space={0}".format(min_space) self.msg += " max_used_space={0}".format(max_space) except KeyError as _: raise UnknownError("failed to parse json returned by NameNode at '{0}:{1}': {2}. {3}"\ .format(self.host, self.port, _, support_msg_api()))
def parse_json(self, json_data): log.info('parsing response') try: data = json_data['beans'][0] total_blocks = data['TotalBlocks'] if not isInt(total_blocks): raise UnknownError('non-integer returned by NameNode for number of total blocks! {0}'\ .format(support_msg_api())) total_blocks = int(total_blocks) self.msg = 'HDFS Total Blocks = {0:d}'.format(total_blocks) self.check_thresholds(total_blocks) self.msg += ' | hdfs_total_blocks={0:d}{1}'.format(total_blocks, self.get_perf_thresholds()) except KeyError as _: raise UnknownError("failed to parse json returned by NameNode at '{0}:{1}': {2}. {3}"\ .format(self.host, self.port, _, support_msg_api()))
def parse_json(self, json_data): log.info('parsing response') try: data = json_data['beans'][0] total_blocks = data['TotalBlocks'] if not isInt(total_blocks): raise UnknownError('non-integer returned by NameNode for number of total blocks! {0}'\ .format(support_msg_api())) total_blocks = int(total_blocks) self.msg = 'HDFS Total Blocks = {0:d}'.format(total_blocks) self.check_thresholds(total_blocks) self.msg += ' | hdfs_total_blocks={0:d}{1}'.format( total_blocks, self.get_perf_thresholds()) except KeyError as _: raise UnknownError("failed to parse json returned by NameNode at '{0}:{1}': {2}. {3}"\ .format(self.host, self.port, _, support_msg_api()))
def get_tables(self): try: tables = self.conn.tables() if not isList(tables): qquit('UNKNOWN', 'table list returned is not a list! ' + support_msg_api()) except (socket.timeout, ThriftException, HBaseIOError) as _: qquit('CRITICAL', 'error while trying to get table list: {0}'.format(_))
def print_table_regions(self): print('=' * self.total_width) print('{0:{1}}{2}'.format(self.region_header, self.region_width, self.separator), end='') print('{0:{1}}{2}'.format(self.start_key_header, self.start_key_width, self.separator), end='') print('{0:{1}}{2}'.format(self.end_key_header, self.end_key_width, self.separator), end='') print('{0}'.format(self.server_header)) print('=' * self.total_width) try: for region in self._regions: print('{0:{1}}{2}'.format( self.bytes_to_str(self.shorten_region_name( region['name'])), self.region_width, self.separator), end='') print('{0:{1}}{2}'.format( self.bytes_to_str(region['start_key']), self.start_key_width, self.separator), end='') print('{0:{1}}{2}'.format(self.bytes_to_str(region['end_key']), self.end_key_width, self.separator), end='') print('{0}:{1}'.format(region['server_name'], region['port'])) except KeyError as _: die('error parsing region info: {0}. '.format(_) + support_msg_api()) print('\nNumber of Regions: {0:d}'.format(len(self._regions)))
def parse_json(self, json_data): if not isList(json_data): raise UnknownError('non-list returned for storage plugins. {}'.format(support_msg_api())) if self.get_opt('list'): print('Apache Drill storage plugins:\n') print('=' * 50) print('%-10s\t%-10s\t%s' % ('Name', 'Type', 'Enabled')) print('=' * 50 + '\n') for storage_plugin in json_data: name = storage_plugin['name'] config = storage_plugin['config'] plugin_type = config['type'] enabled = config['enabled'] print('%-10s\t%-10s\t%s' % (name, plugin_type, enabled)) sys.exit(ERRORS['UNKNOWN']) config = None for storage_plugin in json_data: name = storage_plugin['name'] if name == self.storage_plugin: config = storage_plugin['config'] plugin_type = config['type'] enabled = config['enabled'] break if not config: raise CriticalError("Apache Drill storage plugin '{}' not found! See --list for available plugins!"\ .format(self.storage_plugin)) self.msg = "Apache Drill storage plugin '{}' enabled = {}, plugin type = '{}'"\ .format(self.storage_plugin, enabled, plugin_type) if not enabled: self.critical() _type = self.get_opt('type') if _type and _type != plugin_type: self.critical() self.msg += " (expected '{}')".format(_type)
def parse_json(self, json_data): log.info('parsing response') try: data = json_data['beans'][0] name_dir_statuses = data['NameDirStatuses'] name_dir_data = json.loads(name_dir_statuses) active_dirs = name_dir_data['active'] failed_dirs = name_dir_data['failed'] num_active_dirs = len(active_dirs) num_failed_dirs = len(failed_dirs) self.msg = 'NameNode has {0} failed dir{1}'.format(num_failed_dirs, plural(num_failed_dirs)) if num_failed_dirs > 0: self.warning() if self.verbose: self.msg += ' ({0})'.format(', '.join(failed_dirs)) self.msg += ', {0} active dir{1}'.format(num_active_dirs, plural(num_active_dirs)) if num_active_dirs < 1: self.critical() if self.verbose and num_active_dirs > 0: self.msg += ' ({0})'.format(', '.join(active_dirs)) self.msg += ' | num_failed_dirs={0} num_active_dirs={1}'.format(num_failed_dirs, num_active_dirs) except KeyError as _: raise UnknownError("failed to parse json returned by NameNode at '{0}:{1}': {2}. {3}"\ .format(self.host, self.port, _, support_msg_api())) except ValueError as _: raise UnknownError("invalid json returned for NameDirStatuses by Namenode '{0}:{1}': {2}"\ .format(self.host, self.port, _))
def check_version(self, version): log.info("checking version '%s'", version) if not version: qquit('UNKNOWN', '{0} version not found. {1}'.format(self.software, support_msg_api())) if not isVersion(version): qquit('UNKNOWN', '{0} version unrecognized \'{1}\'. {2}'\ .format(self.software, version, support_msg_api())) self.msg = '{0} version = {1}'.format(self.software, version) if self.expected is not None: log.info("verifying version against expected regex '%s'", self.expected) if re.match(self.expected, version): log.info('version regex matches retrieved version') else: log.info('version regex does not match retrieved version') self.msg += " (expected '{0}')".format(self.expected) self.critical()
def parse_json(self, json_data): if not isList(json_data): raise UnknownError('non-list returned by Presto for nodes. {0}'.format(support_msg_api())) num_worker_nodes = len(json_data) self.msg = 'Presto SQL worker nodes = {0}'.format(num_worker_nodes) self.check_thresholds(num_worker_nodes) self.msg += ' | num_worker_nodes={0}{1}'.format(num_worker_nodes, self.get_perf_thresholds('lower'))
def run(self): self.no_args() host = self.get_opt('host') port = self.get_opt('port') validate_host(host) validate_port(port) url = 'http://%(host)s:%(port)s/oozie/v1/admin/status' % locals() req = RequestHandler().get(url) # should == NORMAL if not isJson(req.content): raise UnknownError( 'non-JSON returned by Oozie server at {0}:{1}'.format( host, port)) status = None try: _ = json.loads(req.content) status = _['systemMode'] except KeyError: raise UnknownError( 'systemMode key was not returned in output from Oozie. {0}'. format(support_msg_api())) self.msg = 'Oozie status = {0}'.format(status) if status == 'NORMAL': self.ok() else: self.critical()
def parse_json(self, json_data): if not isList(json_data): raise UnknownError('non-list returned by Atlas metadata server instance at {0}:{1}! {2}'\ .format(self.host, self.port, support_msg_api())) if len(json_data) < 1: raise CriticalError('no entities found!') if self.list_entities: print('=' * 100) print('{0:40} {1:25} {2}'.format('ID', 'Type', 'Name')) print('=' * 100) for entity in json_data: name = self.get_key(entity, 'name') _id = self.get_key(entity, 'id') _type = self.get_key(entity, 'type') print('{0:40} {1:25} {2}'.format(_id, _type, name)) sys.exit(ERRORS['UNKNOWN']) if self.entity_id: if len(json_data) > 1: raise CriticalError('more than one matching entity returned!') json_data = json_data[0] elif self.entity_name: for entity in json_data: if self.entity_name == self.get_key(entity, 'name'): # Recursion - a bit too clever but convenient self.entity_name = None self.entity_id = self.get_key(entity, 'id') self.path += '/{0}'.format(self.entity_id) req = self.query() self.process_json(req.content) # escape recursion return raise CriticalError("entity with name '{name}' not found!".format(name=self.entity_name)) name = self.get_key(json_data, 'name') state = self.get_key(json_data, 'state') # available for HDFS path but not DB #path = self.get_key(json_data, 'path') _type = self.get_key(json_data, 'type') tags = [] if 'trait_names' in json_data: tags = self.get_key(json_data, 'trait_names') #traits = self.get_key(json_data, 'traits') version = self.get_key(json_data, 'version') modified_date = self.get_key(json_data, 'modified_time') self.msg = " '{name}' exists, state='{state}'".format(name=name, state=state) if state != 'ACTIVE': self.critical() self.msg += " (expected 'ACTIVE')" self.msg += ", type='{type}'".format(type=_type) self.check_type(_type) #if self.verbose: self.msg += ", tags='{tags}'".format(tags=','.join(tags)) self.check_missing_tags(tags) #if self.verbose: #self.msg += ", traits='{traits}'".format(traits=','.join(traits)) #self.check_missing_traits(traits) if self.verbose: self.msg += ", modified_date='{modified_date}', version='{version}'".format( modified_date=modified_date, version=version )
def get_tables(self): try: tables = self.conn.tables() if not isList(tables): qquit('UNKNOWN', 'table list returned is not a list! ' + support_msg_api()) except (socket.error, socket.timeout, ThriftException, HBaseIOError) as _: qquit('CRITICAL', 'error while trying to get table list: {0}'.format(_))
def parse_json(self, json_data): log.info('parsing response') try: bean = json_data['beans'][0] space_used_pc = bean['PercentUsed'] # the way below is more informative #assert type(space_used_pc) == float if re.search(r'e-\d+$', str(space_used_pc)): space_used_pc = 0 if not isFloat(space_used_pc): raise UnknownError("non-float returned for PercentUsed by namenode '{0}:{1}'"\ .format(self.host, self.port)) assert space_used_pc >= 0 stats = {} for stat in ('Total', 'TotalBlocks', 'TotalFiles', 'Used'): stats[stat] = bean[stat] if not isInt(stats[stat]): raise UnknownError("non-integer returned for {0} by namenode '{1}:{2}'"\ .format(stat, self.host, self.port)) stats[stat] = int(stats[stat]) self.ok() self.msg = 'HDFS space used = {0:.2f}% ({1}/{2})'\ .format(space_used_pc, humanize.naturalsize(stats['Used']), humanize.naturalsize(stats['Total'])) self.check_thresholds(space_used_pc) self.msg += ", in {0:d} files spread across {1:d} blocks".format(stats['TotalFiles'], stats['TotalBlocks']) self.msg += " | 'HDFS % space used'={0:f}%{1}".format(space_used_pc, self.get_perf_thresholds()) self.msg += " 'HDFS space used'={0:d}b".format(stats['Used']) self.msg += " 'HDFS file count'={0:d}".format(stats['TotalFiles']) self.msg += " 'HDFS block count'={0:d}".format(stats['TotalBlocks']) except KeyError as _: raise UnknownError("failed to parse json returned by NameNode at '{0}:{1}': {2}. {3}"\ .format(self.host, self.port, _, support_msg_api())) except ValueError as _: raise UnknownError("invalid json returned for LiveNodes by Namenode '{0}:{1}': {2}"\ .format(self.host, self.port, _))
def parse_json(self, json_data): if self.get_opt('logstash_5'): pipeline = json_data['pipeline'] else: pipelines = json_data['pipelines'] if self.get_opt('list'): print('Logstash Pipelines:\n') for pipeline in pipelines: print(pipeline) sys.exit(ERRORS['UNKNOWN']) pipeline = None if self.pipeline in pipelines: pipeline = pipelines[self.pipeline] self.msg = "Logstash pipeline '{}' ".format(self.pipeline) if pipeline: self.msg += 'exists' if 'workers' not in pipeline: raise UnknownError('workers field not found, Logstash may still be initializing' + \ '. If problem persists {}'.format(support_msg_api())) workers = pipeline['workers'] self.msg += ' with {} workers'.format(workers) self.check_thresholds(workers) if not self.get_opt('logstash_5'): dead_letter_queue_enabled = pipeline['dead_letter_queue_enabled'] self.msg += ', dead letter queue enabled: {}'.format(dead_letter_queue_enabled) if self.get_opt('dead_letter_queue_enabled') and not dead_letter_queue_enabled: self.warning() self.msg += ' (expected True)' batch_delay = pipeline['batch_delay'] batch_size = pipeline['batch_size'] self.msg += ', batch delay: {}, batch size: {}'.format(batch_delay, batch_size) else: self.critical() self.msg += 'does not exist!'
def get_version(self): url = 'http://{host}:{port}/solr/admin/info/system'.format(host=self.host, port=self.port) log.debug('GET %s', url) try: req = requests.get(url) except requests.exceptions.RequestException as _: qquit('CRITICAL', _) log.debug('response: %s %s', req.status_code, req.reason) log.debug('content:\n%s\n%s\n%s', '='*80, req.content.strip(), '='*80) if req.status_code != 200: qquit('CRITICAL', '%s %s' % (req.status_code, req.reason)) # versions 7.0+ if isJson(req.content): json_data = json.loads(req.content) version = json_data['lucene']['solr-spec-version'] else: soup = BeautifulSoup(req.content, 'html.parser') if log.isEnabledFor(logging.DEBUG): log.debug("BeautifulSoup prettified:\n{0}\n{1}".format(soup.prettify(), '='*80)) try: version = soup.find('str', {'name':'solr-spec-version'}).text except (AttributeError, TypeError) as _: qquit('UNKNOWN', 'failed to find parse Solr output. {0}\n{1}'\ .format(support_msg_api(), traceback.format_exc())) return version
def parse_scheduled_request(content): try: _ = json.loads(content) if _['RequestSchedule']['last_execution_status'] == 'COMPLETED': log.info('COMPLETED') return 'COMPLETED' for item in _['RequestSchedule']['batch']['batch_requests']: request_status = 'NO STATUS YET' if 'request_status' in item: request_status = item['request_status'] if request_status == 'COMPLETED': continue request_body = item['request_body'] request_body_dict = json.loads(request_body) command = request_body_dict['RequestInfo']['command'] context = request_body_dict['RequestInfo']['context'] log.info('{request_status}: {command}: {context}'.format( request_status=request_status, command=command, context=context)) if request_status != 'ABORTED': return 'IN_PROGRESS' except (KeyError, ValueError) as _: die('parsing schedule request status failed: ' + str(_) + '. ' + support_msg_api())
def parse_json(self, json_data): if not isList(json_data): raise UnknownError( 'non-list returned for config settings. {}'.format( support_msg_api())) if self.list_config: print('Apache Drill config settings:\n') for config in json_data: print('{} = {}'.format(config['name'], config['value'])) sys.exit(ERRORS['UNKNOWN']) value = None for config in json_data: name = config['name'] if name == self.config_key: value = config['value'] break if value is None: raise UnknownError( "config key '{}' not found. See --list for all config keys". format(self.config_key)) # intentionally using name instead of self.config_key to cause NameError if not set or make error more visible if wrong key match self.msg = "Apache Drill config '{}' = '{}'".format(name, value) if re.match(str(self.expected_value), str(value), re.I): self.ok() else: self.critical() self.msg += " (expected '{}')".format(self.expected_value)
def get_version(self): log.info('querying %s', self.software) url = 'http://{host}:{port}/version'.format(host=self.host, port=self.port) log.debug('GET %s', url) try: req = requests.get(url) except requests.exceptions.RequestException as _: qquit('CRITICAL', _) log.debug("response: %s %s", req.status_code, req.reason) log.debug("content:\n%s\n%s\n%s", '=' * 80, req.content.strip(), '=' * 80) # Special handling for Nginx, expecting 404 rather than usual 200 if req.status_code != 404: qquit( 'CRITICAL', '{0} {1} (expecting 404)'.format(req.status_code, req.reason)) soup = BeautifulSoup(req.content, 'html.parser') if log.isEnabledFor(logging.DEBUG): log.debug("BeautifulSoup prettified:\n{0}\n{1}".format( soup.prettify(), '=' * 80)) try: version = soup.findAll('center')[1].text except (AttributeError, TypeError) as _: qquit('UNKNOWN', 'failed to find parse {0} output. {1}\n{2}'\ .format(self.software, support_msg_api(), traceback.format_exc())) if '/' in version: version = version.split('/')[1] return version
def get_version(self): url = 'http://{host}:{port}/solr/admin/info/system'.format( host=self.host, port=self.port) log.debug('GET %s', url) try: req = requests.get(url) except requests.exceptions.RequestException as _: qquit('CRITICAL', _) log.debug('response: %s %s', req.status_code, req.reason) log.debug('content:\n%s\n%s\n%s', '=' * 80, req.content.strip(), '=' * 80) if req.status_code != 200: qquit('CRITICAL', '%s %s' % (req.status_code, req.reason)) # versions 7.0+ if isJson(req.content): json_data = json.loads(req.content) version = json_data['lucene']['solr-spec-version'] else: soup = BeautifulSoup(req.content, 'html.parser') if log.isEnabledFor(logging.DEBUG): log.debug("BeautifulSoup prettified:\n{0}\n{1}".format( soup.prettify(), '=' * 80)) try: version = soup.find('str', {'name': 'solr-spec-version'}).text except (AttributeError, TypeError) as _: qquit('UNKNOWN', 'failed to find parse Solr output. {0}\n{1}'\ .format(support_msg_api(), traceback.format_exc())) return version
def run(self): url = '{protocol}://{host}:{port}/rest/ingestApi/getSessionCount'.format(host=self.host, port=self.port, protocol=self.protocol) log.debug('GET %s', url) try: req = requests.get(url) except requests.exceptions.RequestException as _: errhint = '' if 'BadStatusLine' in str(_.message): errhint = ' (possibly connecting to an SSL secured port without using --ssl?)' elif self.protocol == 'https' and 'unknown protocol' in str(_.message): errhint = ' (possibly connecting to a plain HTTP port with the -S / --ssl switch enabled?)' qquit('CRITICAL', str(_) + errhint) log.debug("response: %s %s", req.status_code, req.reason) log.debug("content:\n%s\n%s\n%s", '='*80, req.content.strip(), '='*80) if req.status_code != 200: qquit('CRITICAL', '{0} {1}'.format(req.status_code, req.reason)) try: count = req.content.strip() if not isInt(count): raise ValueError('non-integer value returned by Attivio AIE') count = int(count) self.msg = '{software} ingest session count = {count}'.format(software=self.software, count=count) self.check_thresholds(count) except (KeyError, ValueError): qquit('UNKNOWN', 'error parsing output from {software}: {exception}: {error}. {support_msg}'\ .format(software=self.software, exception=type(_).__name__, error=_, support_msg=support_msg_api())) self.msg += ' | ingest_session_count={0:d}{thresholds}'.format(count, thresholds=self.get_perf_thresholds())
def parse_json(self, json_data): if not isList(json_data): raise UnknownError( 'non-list returned by Presto for nodes. {0}'.format( support_msg_api())) nodes_lagging = [] max_lag = 0 re_protocol = re.compile('^https?://') for node_item in json_data: last_response_time = node_item['lastResponseTime'] last_response_datetime = datetime.strptime( last_response_time, '%Y-%m-%dT%H:%M:%S.%fZ') timedelta = datetime.utcnow() - last_response_datetime response_age = int(timedelta.total_seconds()) if response_age > max_lag: max_lag = response_age if response_age > self.max_age: uri = node_item['uri'] uri = re_protocol.sub('', uri) nodes_lagging += [uri] log.info( "node '%s' last response age %d secs > max age %s secs", node_item['uri'], response_age, self.max_age) else: log.info("node '%s' last response age %d secs", node_item['uri'], response_age) num_nodes_lagging = len(nodes_lagging) self.msg = 'Presto SQL worker nodes with response timestamps older than {0:d} secs = {1:d}'\ .format(self.max_age, num_nodes_lagging) self.check_thresholds(num_nodes_lagging) self.msg += ', current max response age = {0:.2f} secs'.format(max_lag) if self.verbose and nodes_lagging: self.msg += ' [{0}]'.format(', '.join(nodes_lagging)) self.msg += ' | num_nodes_lagging={0}{1} max_response_age={2:.2f}s'\ .format(num_nodes_lagging, self.get_perf_thresholds(), max_lag)
def check_version(self, version): log.info("checking version '%s'", version) if not version: qquit('UNKNOWN', '{0} version not found. {1}'.format(self.software, support_msg_api())) if not isVersion(version): qquit('UNKNOWN', '{0} version unrecognized \'{1}\'. {2}'\ .format(self.software, version, support_msg_api())) self.msg = '{0} version = {1}'.format(self.software, version) if self.expected is not None: log.info("verifying version against expected regex '%s'", self.expected) if re.match(self.expected, str(version)): log.info('version regex matches retrieved version') else: log.info('version regex does not match retrieved version') self.msg += " (expected '{0}')".format(self.expected) self.critical()
def parse_json(self, json_data): data = json_data['value'] try: nodes = data['nodes'] except KeyError: raise UnknownError('nodes field not found, are you trying to run this on an old ' + 'Selenium Hub <= 3.x or Selenoid? That information is not available in those APIs') if not isList(nodes): raise UnknownError('nodes field is not a list as expected. {}'.format(support_msg_api())) total_nodes = 0 available_nodes = 0 for node in nodes: if self.browser: supports_browser = False for slot in node['slots']: if slot['stereotype']['browserName'].lower() == self.browser.lower(): supports_browser = True break if not supports_browser: continue total_nodes += 1 if node['availability'] == 'UP': available_nodes += 1 self.ok() self.msg = 'Selenium Hub ' if self.browser: self.msg += "'{}' ".format(self.browser) self.msg += 'nodes available = {}/{}'.format(available_nodes, total_nodes) self.check_thresholds(available_nodes) self.msg += ' | nodes_available={}{} nodes_total={}'\ .format(available_nodes, self.get_perf_thresholds(boundary='lower'), total_nodes)
def run(self): url = '{protocol}://{host}:{port}/rest/ingestApi/getSessionCount'.format( host=self.host, port=self.port, protocol=self.protocol) log.debug('GET %s', url) try: req = requests.get(url) except requests.exceptions.RequestException as _: errhint = '' if 'BadStatusLine' in str(_.message): errhint = ' (possibly connecting to an SSL secured port without using --ssl?)' elif self.protocol == 'https' and 'unknown protocol' in str( _.message): errhint = ' (possibly connecting to a plain HTTP port with the -S / --ssl switch enabled?)' qquit('CRITICAL', str(_) + errhint) log.debug("response: %s %s", req.status_code, req.reason) log.debug("content:\n%s\n%s\n%s", '=' * 80, req.content.strip(), '=' * 80) if req.status_code != 200: qquit('CRITICAL', '{0} {1}'.format(req.status_code, req.reason)) try: count = req.content.strip() if not isInt(count): raise ValueError('non-integer value returned by Attivio AIE') count = int(count) self.msg = '{software} ingest session count = {count}'.format( software=self.software, count=count) self.check_thresholds(count) except (KeyError, ValueError): qquit('UNKNOWN', 'error parsing output from {software}: {exception}: {error}. {support_msg}'\ .format(software=self.software, exception=type(_).__name__, error=_, support_msg=support_msg_api())) self.msg += ' | ingest_session_count={0:d}{thresholds}'.format( count, thresholds=self.get_perf_thresholds())
def parse_json(self, json_data): log.info('parsing response') try: data = json_data['beans'][0] name_dir_statuses = data['NameDirStatuses'] name_dir_data = json.loads(name_dir_statuses) active_dirs = name_dir_data['active'] failed_dirs = name_dir_data['failed'] num_active_dirs = len(active_dirs) num_failed_dirs = len(failed_dirs) self.msg = 'NameNode has {0} failed dir{1}'.format( num_failed_dirs, plural(num_failed_dirs)) if num_failed_dirs > 0: self.warning() if self.verbose: self.msg += ' ({0})'.format(', '.join(failed_dirs)) self.msg += ', {0} active dir{1}'.format(num_active_dirs, plural(num_active_dirs)) if num_active_dirs < 1: self.critical() if self.verbose and num_active_dirs > 0: self.msg += ' ({0})'.format(', '.join(active_dirs)) self.msg += ' | num_failed_dirs={0} num_active_dirs={1}'.format( num_failed_dirs, num_active_dirs) except KeyError as _: raise UnknownError("failed to parse json returned by NameNode at '{0}:{1}': {2}. {3}"\ .format(self.host, self.port, _, support_msg_api())) except ValueError as _: raise UnknownError("invalid json returned for NameDirStatuses by Namenode '{0}:{1}': {2}"\ .format(self.host, self.port, _))
def parse_json(self, json_data): if not isList(json_data): raise UnknownError('non-list returned by Atlas metadata server instance at {0}:{1}! {2}'\ .format(self.host, self.port, support_msg_api())) if len(json_data) < 1: raise CriticalError('no entities found!') if self.list_entities: print('=' * 100) print('{0:40} {1:25} {2}'.format('ID', 'Type', 'Name')) print('=' * 100) for entity in json_data: name = self.get_key(entity, 'name') _id = self.get_key(entity, 'id') _type = self.get_key(entity, 'type') print('{0:40} {1:25} {2}'.format(_id, _type, name)) sys.exit(ERRORS['UNKNOWN']) if self.entity_id: if len(json_data) > 1: raise CriticalError('more than one matching entity returned!') json_data = json_data[0] elif self.entity_name: for entity in json_data: if self.entity_name == self.get_key(entity, 'name'): # Recursion - a bit too clever but convenient self.entity_name = None self.entity_id = self.get_key(entity, 'id') self.path += '/{0}'.format(self.entity_id) req = self.query() self.process_json(req.content) # escape recursion return raise CriticalError("entity with name '{name}' not found!".format(name=self.entity_name)) name = self.get_key(json_data, 'name') state = self.get_key(json_data, 'state') # available for HDFS path but not DB #path = self.get_key(json_data, 'path') _type = self.get_key(json_data, 'type') tags = [] if 'trait_names' in json_data: tags = self.get_key(json_data, 'trait_names') #traits = self.get_key(json_data, 'traits') version = self.get_key(json_data, 'version') modified_date = self.get_key(json_data, 'modified_time') self.msg += " '{name}' exists, state='{state}'".format(name=name, state=state) if state != 'ACTIVE': self.critical() self.msg += " (expected 'ACTIVE')" self.msg += ", type='{type}'".format(type=_type) self.check_type(_type) #if self.verbose: self.msg += ", tags='{tags}'".format(tags=','.join(tags)) self.check_missing_tags(tags) #if self.verbose: #self.msg += ", traits='{traits}'".format(traits=','.join(traits)) #self.check_missing_traits(traits) if self.verbose: self.msg += ", modified_date='{modified_date}', version='{version}'".format( modified_date=modified_date, version=version )
def get_key(self, json_data, key): try: return json_data[key] except KeyError: raise UnknownError('\'{0}\' key was not returned in output from '.format(key) + 'HiveServer2 Interactive instance at {0}:{1}. {2}'\ .format(self.host, self.port, support_msg_api()))
def get_key(self, json_data, key): try: return json_data[key] except KeyError: raise UnknownError('\'{0}\' key was not returned in output from '.format(key) + 'Atlas metadata server instance at {0}:{1}. {2}'\ .format(self.host, self.port, support_msg_api()))
def run(self): url = 'https://api.travis-ci.org/repos/{repo}/builds'.format( repo=self.repo) log.debug('GET %s' % url) try: req = requests.get(url) except requests.exceptions.RequestException as _: raise CriticalError(_) log.debug("response: %s %s", req.status_code, req.reason) log.debug("content:\n%s\n%s\n%s", '=' * 80, req.content.strip(), '=' * 80) if req.status_code != 200: raise CriticalError("%s %s" % (req.status_code, req.reason)) if log.isEnabledFor(logging.DEBUG): log.debug("\n{0}".format(jsonpp(req.content))) try: self.parse_results(req.content) except (KeyError, ValueError) as _: exception = traceback.format_exc().split('\n')[-2] # this covers up the traceback info and makes it harder to debug #raise UnknownError('failed to parse expected json response from Travis CI API: {0}'.format(exception)) qquit( 'UNKNOWN', 'failed to parse expected json response from Travis CI API: {0}. {1}' .format(exception, support_msg_api()))
def parse_json(self, json_data): if not isList(json_data): raise UnknownError('non-list returned by Presto for nodes. {0}'.format(support_msg_api())) nodes_lagging = [] max_lag = 0 re_protocol = re.compile('^https?://') num_nodes = len(json_data) for node_item in json_data: last_response_time = node_item['lastResponseTime'] last_response_datetime = datetime.strptime(last_response_time, '%Y-%m-%dT%H:%M:%S.%fZ') timedelta = datetime.utcnow() - last_response_datetime response_age = int(timedelta.total_seconds()) if response_age > max_lag: max_lag = response_age if response_age > self.max_age: uri = node_item['uri'] uri = re_protocol.sub('', uri) nodes_lagging += [uri] log.info("node '%s' last response age %d secs > max age %s secs", node_item['uri'], response_age, self.max_age) else: log.info("node '%s' last response age %d secs", node_item['uri'], response_age) num_nodes_lagging = len(nodes_lagging) self.msg = 'Presto SQL - worker nodes with response timestamps older than {0:d} secs = {1:d}'\ .format(self.max_age, num_nodes_lagging) self.check_thresholds(num_nodes_lagging) self.msg += ' out of {0:d} nodes'.format(num_nodes) if num_nodes < 1: self.warning() self.msg += ' (< 1 worker found)' self.msg += ', current max response age = {0:.2f} secs'.format(max_lag) if self.verbose and nodes_lagging: self.msg += ' [{0}]'.format(', '.join(nodes_lagging)) self.msg += ' | num_nodes_lagging={0}{1} max_response_age={2:.2f}s'\ .format(num_nodes_lagging, self.get_perf_thresholds(), max_lag)
def print_results(self, term, limit=None): data = self.search(term, limit) results = {} longest_name = 8 try: # collect in dict to order by stars like normal docker search command for item in data['results']: star = item['star_count'] name = item['name'] if len(name) > longest_name: longest_name = len(name) if not isInt(star): die("star count '{0}' for repo '{1}' is not an integer! {2}" .format(star, name, support_msg_api())) results[star] = results.get(star, {}) results[star][name] = results[star].get(name, {}) result = {} result['description'] = item['description'] result['official'] = '[OK]' if item['is_official'] else '' # docker search doesn't output this so neither will I #result['trusted'] = result['is_trusted'] result['automated'] = '[OK]' if item['is_automated'] else '' results[star][name] = result # mimicking out spacing from 'docker search' command if not self.quiet: print('{0:{5}s} {1:45s} {2:7s} {3:8s} {4:10s}'.format( 'NAME', 'DESCRIPTION', 'STARS', 'OFFICIAL', 'AUTOMATED', longest_name)) except KeyError as _: die('failed to parse results fields from data returned by DockerHub ' + '(format may have changed?): {0}'.format(_)) except IOError as _: if str(_) == '[Errno 32] Broken pipe': pass else: raise def truncate(mystr, length): if len(mystr) > length: mystr = mystr[0:length - 3] + '...' return mystr for star in reversed(sorted(results)): for name in sorted(results[star]): if self.quiet: print(name.encode('utf-8')) else: desc = truncate(results[star][name]['description'], 45) print('{0:{5}s} {1:45s} {2:<7d} {3:8s} {4:10s}'. format(name.encode('utf-8'), desc.encode('utf-8'), star, results[star][name]['official'], results[star][name]['automated'], longest_name)) if self.verbose and not self.quiet: try: print('\nResults Shown: {0}\nTotal Results: {1}'.format( len(data['results']), data['num_results'])) except KeyError as _: die('failed to parse get total results count from data returned by DockerHub ' + '(format may have changed?): {0}'.format(_))
def parse_builds(self, content): log.debug('parsing build info') build = None collected_builds = [] json_data = json.loads(content) if not json_data or \ 'builds' not in json_data or \ not json_data['builds']: qquit( 'UNKNOWN', "no Travis CI builds returned by the Travis API." + " Either the specified repo '{0}' doesn't exist".format( self.repo) + " or no builds have happened yet?" + " Also remember the repo is case sensitive, for example 'harisekhon/nagios-plugins' returns this" + " blank build set whereas 'HariSekhon/nagios-plugins' succeeds" + " in returning latest builds information") builds = json_data['builds'] # get latest finished failed build last_build_number = None found_newer_passing_build = False for _ in builds: # API returns most recent build first # extra check to make sure we're getting the very latest build number and API hasn't changed build_number = _['number'] if not isInt(build_number): raise UnknownError('build number returned is not an integer!') build_number = int(build_number) if last_build_number is None: last_build_number = int(build_number) + 1 if build_number >= last_build_number: raise UnknownError('build number returned is out of sequence, cannot be >= last build returned' + \ '{0}'.format(support_msg_api())) last_build_number = build_number if self.completed: if len(collected_builds) < self.num and _['state'] in ( 'passed', 'finished', 'failed', 'errored'): collected_builds.append(_) elif self.failed: if _['state'] == 'passed': if not collected_builds and not found_newer_passing_build: log.warning("found more recent successful build #%s with state = '%s'" + \ ", you may not need to debug this build any more", _['number'], _['state']) found_newer_passing_build = True elif _['state'] in ('failed', 'errored'): if len(collected_builds) < self.num: collected_builds.append(_) # by continuing to iterate through the rest of the builds we can check # their last_build numbers are descending for extra sanity checking #break elif len(collected_builds) < self.num: collected_builds.append(_) # by continuing to iterate through the rest of the builds we can check # their last_build numbers are descending for extra sanity checking #break if not collected_builds: qquit('UNKNOWN', 'no recent builds found') if log.isEnabledFor(logging.DEBUG): for build in collected_builds: log.debug("build:\n%s", jsonpp(build)) return collected_builds
def print_table_regions(self): print('=' * self.total_width) print('{0:{1}}{2}'.format(self.region_header, self.region_width, self.separator), end='') print('{0:{1}}{2}'.format(self.start_key_header, self.start_key_width, self.separator), end='') print('{0:{1}}{2}'.format(self.end_key_header, self.end_key_width, self.separator), end='') print('{0}'.format(self.server_header)) print('=' * self.total_width) try: for region in self._regions: print('{0:{1}}{2}'.format(self.bytes_to_str(self.shorten_region_name(region['name'])), self.region_width, self.separator), end='') print('{0:{1}}{2}'.format(self.bytes_to_str(region['start_key']), self.start_key_width, self.separator), end='') print('{0:{1}}{2}'.format(self.bytes_to_str(region['end_key']), self.end_key_width, self.separator), end='') print('{0}:{1}'.format(region['server_name'], region['port'])) except KeyError as _: die('error parsing region info: {0}. '.format(_) + support_msg_api()) print('\nNumber of Regions: {0:d}'.format(len(self._regions)))
def check_table_regions(self): log.info('checking regions for table \'%s\'', self.table) regions = None try: table = self.conn.table(self.table) regions = table.regions() except HBaseIOError as _: #if 'org.apache.hadoop.hbase.TableNotFoundException' in _.message: if 'TableNotFoundException' in _.message: qquit('CRITICAL', 'table \'{0}\' does not exist'.format(self.table)) else: qquit('CRITICAL', _) except (socket.error, socket.timeout, ThriftException) as _: qquit('CRITICAL', _) if log.isEnabledFor(logging.DEBUG): log.debug('%s', jsonpp(regions)) if not regions: qquit('CRITICAL', 'failed to get regions for table \'{0}\''.format(self.table)) if not isList(regions): qquit('UNKNOWN', 'region info returned is not a list! ' + support_msg_api()) num_regions = len(regions) log.info('num regions: %s', num_regions) self.msg = 'HBase table \'{0}\' has {1} region{2}'.format(self.table, num_regions, plural(num_regions)) self.check_thresholds(num_regions) num_unassigned_regions = 0 for region in regions: try: if not region['server_name']: #log.debug('region \'%s\' is not assigned to any server', region['name']) num_unassigned_regions += 1 except KeyError as _: qquit('UNKNOWN', 'failed to find server assigned to region. ' + support_msg_api()) log.info('num unassigned regions: %s', num_unassigned_regions) self.msg += ', {0} unassigned region{1}'.format(num_unassigned_regions, plural(num_unassigned_regions)) if num_unassigned_regions > 0: self.warning() self.msg += '!' self.msg += ' |' self.msg += ' num_regions={0}'.format(num_regions) + self.get_perf_thresholds(boundary='lower') self.msg += ' num_unassigned_regions={0};1;0'.format(num_unassigned_regions) log.info('finished, closing connection') self.conn.close()
def extract_response_message(response_dict): try: return'{0}: {1}. '.format(response_dict['status']['responseCode'], response_dict['status']['responseMessage']) except KeyError: log.warn('failed to extract responseCode/responseMessage for additional error information. ' \ + support_msg_api()) return ''
def parse_json(self, json_data): num_executors = json_data['numExecutors'] if not isInt(num_executors): raise UnknownError('non-integer returned by Jenkins. {0}'.format(support_msg_api())) self.msg += '{:d}'.format(num_executors) self.check_thresholds(num_executors) self.msg += ' | num_executors={0:d}'.format(num_executors) self.msg += self.get_perf_thresholds(boundary='lower')
def parse_json(self, json_data): log.info('parsing response') try: live_nodes = json_data['beans'][0]['LiveNodes'] live_node_data = json.loads(live_nodes) num_datanodes = len(live_node_data) if num_datanodes < 1: raise CriticalError("no live datanodes returned by JMX API from namenode '{0}:{1}'"\ .format(self.host, self.port)) max_blocks = 0 min_blocks = None for datanode in live_node_data: blocks = live_node_data[datanode]['numBlocks'] if not isInt(blocks): raise UnknownError('numBlocks is not an integer! {0}'.format(support_msg_api())) blocks = int(blocks) log.info("datanode '%s' has %s blocks", datanode, blocks) if blocks > max_blocks: max_blocks = blocks if min_blocks is None or blocks < min_blocks: min_blocks = blocks log.info("max blocks on a single datanode = %s", max_blocks) log.info("min blocks on a single datanode = %s", min_blocks) assert min_blocks is not None divisor = min_blocks if min_blocks < 1: log.info("min blocks < 1, resetting divisor to 1 (% will be very high)") divisor = 1 block_imbalance = float("{0:.2f}".format((max_blocks - min_blocks) / divisor * 100)) self.msg = '{0}% block imbalance across {1} datanode{2}'\ .format(block_imbalance, num_datanodes, plural(num_datanodes)) self.ok() self.check_thresholds(block_imbalance) if self.verbose: self.msg += ' (min blocks = {0}, max blocks = {1})'.format(min_blocks, max_blocks) self.msg += " | block_imbalance={0}%".format(block_imbalance) self.msg += self.get_perf_thresholds() self.msg += " num_datanodes={0}".format(num_datanodes) self.msg += " min_blocks={0}".format(min_blocks) self.msg += " max_blocks={0}".format(max_blocks) except KeyError as _: raise UnknownError("failed to parse json returned by NameNode at '{0}:{1}': {2}. {3}"\ .format(self.host, self.port, _, support_msg_api())) except ValueError as _: raise UnknownError("invalid json returned for LiveNodes by Namenode '{0}:{1}': {2}"\ .format(self.host, self.port, _))
def parse_json(self, json_data): if self.list_jobs: print('Jenkins Jobs:\n') for job in json_data['jobs']: print(job['name']) sys.exit(ERRORS['UNKNOWN']) if 'lastCompletedBuild' in json_data: last_completed_build = json_data['lastCompletedBuild'] if not last_completed_build: raise WarningError("job '{job}' not built yet".format(job=self.job)) self.path = '/job/{job}/{number}/api/json'.format(job=self.job, number=last_completed_build['number']) req = self.query() self.process_json(req.content) return displayname = json_data['displayName'] duration = json_data['duration'] if not isInt(duration): raise UnknownError('duration field returned non-integer! {0}'.format(support_msg_api())) duration = int(duration) / 1000 result = json_data['result'] timestamp = json_data['timestamp'] if not isInt(timestamp): raise UnknownError('timestamp field returned non-integer! {0}'.format(support_msg_api())) timestamp = int(timestamp) building = json_data['building'] self.msg += "build {build} status: ".format(build=displayname) if building: self.unknown() self.msg += 'STILL BUILDING!' return self.msg += result if result != 'SUCCESS': self.critical() self.msg += ', duration={duration} secs'.format(duration=duration) self.check_thresholds(duration) age = time.time() - (timestamp/1000) self.msg += ', age={age} secs'.format(age=sec2human(age)) if age < 0: self.warning() self.msg += ' (< 0!)' if self.age and age > self.age: self.critical() self.msg += ' (> {0:d})'.format(self.age) self.msg += ' | build_duration={duration}s{perf_thresholds}'.format(duration=duration, perf_thresholds=self.get_perf_thresholds())
def parse_json(self, json_data): if not isDict(json_data): raise UnknownError('non-dict returned for hot threads. {}'.format(support_msg_api())) hot_threads = json_data['hot_threads']['threads'] top_3 = self.get_opt('top_3') sum_percent = 0 last_percent = None for thread in hot_threads: thread_percent = thread['percent_of_cpu_time'] if last_percent is None: last_percent = thread_percent if thread_percent > last_percent: raise UnknownError('assertion failure - subsequent thread percent is unexpectedly higher' + \ ', out of expected order. {}'.format(support_msg_api())) sum_percent += thread_percent self.msg = 'Logstash ' if top_3: self.msg += 'top 3 hot threads cpu percentage = {}%'.format(sum_percent) self.check_thresholds(sum_percent) self.msg += ', ' # they come sorted with highest at top top_thread = hot_threads[0] name = top_thread['name'] percent = top_thread['percent_of_cpu_time'] state = top_thread['state'] # not available in 5.0, only later versions such as 6.0 #thread_id = top_thread['thread_id'] self.msg += 'top hot thread \'{}\' cpu percentage = {}%'.format(name, percent) if not top_3: self.check_thresholds(percent) self.msg += ', state = \'{}\''.format(state) #self.msg += ', id = {}'.format(state, thread_id) if self.verbose: if not isList(top_thread['traces']): raise UnknownError('hot thread\'s trace field is not a list. {}'.format(support_msg_api())) traces = '\\n'.join(top_thread['traces']) self.msg += ', traces: {}'.format(traces) if not top_3: self.msg += ', top 3 hot threads cpu percentage = {}%'.format(sum_percent) self.msg += ' | top_hot_thread_cpu_percentage={}%'.format(percent) if not top_3: self.msg += '{}'.format(self.get_perf_thresholds()) self.msg += ' top_three_hot_thread_cpu_percentage={}%'.format(sum_percent) if top_3: self.msg += '{}'.format(self.get_perf_thresholds())
def parse_json(self, json_data): use_security = json_data['useSecurity'] if not isinstance(use_security, bool): raise UnknownError('non-boolean returned by Jenkins. {0}'.format(support_msg_api())) self.msg += '{0}'.format(use_security) if not use_security: self.msg += ' (expected \'True\')' self.critical()