def parse_json(self, json_data): items = json_data['value'] if not isList(items): raise UnknownError('non-list items returned by API. {}'.format( support_msg_api())) #queue_size = len(items) queue_size = 0 for item in items: if self.browser: matches_browser = False if not isList(item): raise UnknownError( 'non-list item returned by API. {}'.format( support_msg_api())) for _ in items: if _['browserName'].lower() == self.browser.lower(): matches_browser = True break if not matches_browser: queue_size += 1 self.ok() self.msg = 'Selenium Hub ' if self.browser: self.msg += "'{}' ".format(self.browser) self.msg += 'queue size = {}'.format(queue_size) self.check_thresholds(queue_size) self.msg += ' | queue_size={}{}'\ .format(queue_size, self.get_perf_thresholds())
def run(self): log.info('querying %s', self.software) url = '{protocol}://{host}:{port}/PolicyManagement/{api_version}/resources'\ .format(host=self.host, port=self.port, api_version=self.api_version, protocol=self.protocol) log.debug('GET %s', url) try: req = requests.get(url, auth=HTTPBasicAuth(self.user, self.password)) except requests.exceptions.RequestException as _: errhint = '' if 'BadStatusLine' in str(_.message): errhint = ' (possibly connecting to an SSL secured port without using --ssl?)' elif self.protocol == 'https' and 'unknown protocol' in str( _.message): errhint = ' (possibly connecting to a plain HTTP port with the -S / --ssl switch enabled?)' qquit('CRITICAL', str(_) + errhint) log.debug("response: %s %s", req.status_code, req.reason) log.debug("content:\n%s\n%s\n%s", '=' * 80, req.content.strip(), '=' * 80) if req.status_code != 200: qquit('CRITICAL', '{0}: {1}'.format(req.status_code, req.reason)) try: json_dict = json.loads(req.content) if log.isEnabledFor(logging.DEBUG): print(jsonpp(json_dict)) print('=' * 80) if not isDict(json_dict): raise ValueError( "non-dict returned by Blue Talon API (got type '{0}')". format(type(json_dict))) resource_domains_list = json_dict['resource_domains'] if not isList(resource_domains_list): raise ValueError("non-list returned for 'resource_domains' key by Blue Talon API (got type '{0}')"\ .format(type(resource_domains_list))) num_resource_domains = len(resource_domains_list) num_resources = 0 for resource_domain in resource_domains_list: resources = resource_domain['resources'] if not isList(resources): raise ValueError("non-list found for resources in resource_domain '{0}' (got type '{1}'"\ .format(resource_domain['resource_domain_name'], type(resources))) num_resources += len(resources) self.msg += '{num_resources} resources'.format( num_resources=num_resources) self.check_thresholds(num_resources) self.msg += ' across {num_resource_domains} resource domains'\ .format(num_resource_domains=num_resource_domains) self.msg += ' | num_resources={num_resources}{perf} num_resource_domains={num_resource_domains}'\ .format(num_resources=num_resources, num_resource_domains=num_resource_domains, perf=self.get_perf_thresholds()) except (KeyError, ValueError) as _: qquit('UNKNOWN', 'error parsing output from {software}: {exception}: {error}. {support_msg}'\ .format(software=self.software, exception=type(_).__name__, error=_, support_msg=support_msg_api()))
def run(self): log.info('querying %s', self.software) url = '{protocol}://{host}:{port}/PolicyManagement/{api_version}/resources'\ .format(host=self.host, port=self.port, api_version=self.api_version, protocol=self.protocol) log.debug('GET %s', url) try: req = requests.get(url, auth=HTTPBasicAuth(self.user, self.password)) except requests.exceptions.RequestException as _: errhint = '' if 'BadStatusLine' in str(_.message): errhint = ' (possibly connecting to an SSL secured port without using --ssl?)' elif self.protocol == 'https' and 'unknown protocol' in str(_.message): errhint = ' (possibly connecting to a plain HTTP port with the -S / --ssl switch enabled?)' qquit('CRITICAL', str(_) + errhint) log.debug("response: %s %s", req.status_code, req.reason) log.debug("content:\n%s\n%s\n%s", '='*80, req.content.strip(), '='*80) if req.status_code != 200: qquit('CRITICAL', '{0}: {1}'.format(req.status_code, req.reason)) try: json_dict = json.loads(req.content) if log.isEnabledFor(logging.DEBUG): print(jsonpp(json_dict)) print('='*80) if not isDict(json_dict): raise ValueError("non-dict returned by Blue Talon API (got type '{0}')".format(type(json_dict))) resource_domains_list = json_dict['resource_domains'] if not isList(resource_domains_list): raise ValueError("non-list returned for 'resource_domains' key by Blue Talon API (got type '{0}')"\ .format(type(resource_domains_list))) num_resource_domains = len(resource_domains_list) num_resources = 0 for resource_domain in resource_domains_list: resources = resource_domain['resources'] if not isList(resources): raise ValueError("non-list found for resources in resource_domain '{0}' (got type '{1}'"\ .format(resource_domain['resource_domain_name'], type(resources))) num_resources += len(resources) self.msg += '{num_resources} resources'.format(num_resources=num_resources) self.check_thresholds(num_resources) self.msg += ' across {num_resource_domains} resource domains'\ .format(num_resource_domains=num_resource_domains) self.msg += ' | num_resources={num_resources}{perf} num_resource_domains={num_resource_domains}'\ .format(num_resources=num_resources, num_resource_domains=num_resource_domains, perf=self.get_perf_thresholds()) except (KeyError, ValueError) as _: qquit('UNKNOWN', 'error parsing output from {software}: {exception}: {error}. {support_msg}'\ .format(software=self.software, exception=type(_).__name__, error=_, support_msg=support_msg_api()))
def parse_json(self, json_data): if not isList(json_data): raise UnknownError( 'non-list returned by Presto for nodes. {0}'.format( support_msg_api())) nodes_lagging = [] max_lag = 0 re_protocol = re.compile('^https?://') for node_item in json_data: last_response_time = node_item['lastResponseTime'] last_response_datetime = datetime.strptime( last_response_time, '%Y-%m-%dT%H:%M:%S.%fZ') timedelta = datetime.utcnow() - last_response_datetime response_age = int(timedelta.total_seconds()) if response_age > max_lag: max_lag = response_age if response_age > self.max_age: uri = node_item['uri'] uri = re_protocol.sub('', uri) nodes_lagging += [uri] log.info( "node '%s' last response age %d secs > max age %s secs", node_item['uri'], response_age, self.max_age) else: log.info("node '%s' last response age %d secs", node_item['uri'], response_age) num_nodes_lagging = len(nodes_lagging) self.msg = 'Presto SQL worker nodes with response timestamps older than {0:d} secs = {1:d}'\ .format(self.max_age, num_nodes_lagging) self.check_thresholds(num_nodes_lagging) self.msg += ', current max response age = {0:.2f} secs'.format(max_lag) if self.verbose and nodes_lagging: self.msg += ' [{0}]'.format(', '.join(nodes_lagging)) self.msg += ' | num_nodes_lagging={0}{1} max_response_age={2:.2f}s'\ .format(num_nodes_lagging, self.get_perf_thresholds(), max_lag)
def extract_value(self, content): json_data = None try: json_data = json.loads(content) except ValueError: raise UnknownError("non-json data returned by consul: '%s'. %s" % (content, support_msg_api())) value = None if not isList(json_data): raise UnknownError("non-list returned by consul: '%s'. %s" % (content, support_msg_api())) if not json_data: raise UnknownError("blank list returned by consul! '%s'. %s" % (content, support_msg_api())) if len(json_data) > 1: raise UnknownError("more than one key returned by consul! response = '%s'. %s" \ % (content, support_msg_api())) try: value = json_data[0]['Value'] except KeyError: raise UnknownError( "couldn't find field 'Value' in response from consul: '%s'. %s" % (content, support_msg_api())) try: # decodestring might be deprecated but decodebytes isn't available on Python 2.7 #value = base64.decodebytes(value) value = base64.decodestring(value) except TypeError: raise UnknownError( "invalid data returned for key '{0}' value = '{1}', failed to base64 decode" .format(self.key, value)) return value
def parse_json(self, json_data): repository = None if self.repository_id: repository = json_data repository_list = [repository] if not self.repository_id or self.list_repositories: repository_list = json_data['vXRepositories'] if not repository_list: raise CriticalError('Ranger repository not found! (check the --name is correct and that it really exists)') host_info = '' if self.verbose: host_info = " at '{0}:{1}'".format(self.host, self.port) if not isList(repository_list): raise UnknownError("non-list returned for json_data[vXRepositories] by Ranger{0}"\ .format(host_info)) if self.list_repositories: self.print_repositories(repository_list) sys.exit(ERRORS['UNKNOWN']) if repository is None and self.repository_name: for _ in repository_list: if _['name'] == self.repository_name: repository = _ break # this won't apply when --id is given as it's a targeted query that will get 404 before this # will only apply to --name based queries if not repository: raise CriticalError("no matching repository found with name '{name}' in repository list "\ .format(name=self.repository_name) + "returned by Ranger{host_info}".format(host_info=host_info)) self.check_repository(repository)
def parse_json(self, json_data): data = json_data['value'] try: nodes = data['nodes'] except KeyError: raise UnknownError('nodes field not found, are you trying to run this on an old ' + 'Selenium Hub <= 3.x or Selenoid? That information is not available in those APIs') if not isList(nodes): raise UnknownError('nodes field is not a list as expected. {}'.format(support_msg_api())) total_nodes = 0 available_nodes = 0 for node in nodes: if self.browser: supports_browser = False for slot in node['slots']: if slot['stereotype']['browserName'].lower() == self.browser.lower(): supports_browser = True break if not supports_browser: continue total_nodes += 1 if node['availability'] == 'UP': available_nodes += 1 self.ok() self.msg = 'Selenium Hub ' if self.browser: self.msg += "'{}' ".format(self.browser) self.msg += 'nodes available = {}/{}'.format(available_nodes, total_nodes) self.check_thresholds(available_nodes) self.msg += ' | nodes_available={}{} nodes_total={}'\ .format(available_nodes, self.get_perf_thresholds(boundary='lower'), total_nodes)
def parse_json(self, json_data): if not isList(json_data): raise UnknownError('non-list returned by Presto for nodes. {0}'.format(support_msg_api())) nodes_lagging = [] max_lag = 0 re_protocol = re.compile('^https?://') num_nodes = len(json_data) for node_item in json_data: last_response_time = node_item['lastResponseTime'] last_response_datetime = datetime.strptime(last_response_time, '%Y-%m-%dT%H:%M:%S.%fZ') timedelta = datetime.utcnow() - last_response_datetime response_age = int(timedelta.total_seconds()) if response_age > max_lag: max_lag = response_age if response_age > self.max_age: uri = node_item['uri'] uri = re_protocol.sub('', uri) nodes_lagging += [uri] log.info("node '%s' last response age %d secs > max age %s secs", node_item['uri'], response_age, self.max_age) else: log.info("node '%s' last response age %d secs", node_item['uri'], response_age) num_nodes_lagging = len(nodes_lagging) self.msg = 'Presto SQL - worker nodes with response timestamps older than {0:d} secs = {1:d}'\ .format(self.max_age, num_nodes_lagging) self.check_thresholds(num_nodes_lagging) self.msg += ' out of {0:d} nodes'.format(num_nodes) if num_nodes < 1: self.warning() self.msg += ' (< 1 worker found)' self.msg += ', current max response age = {0:.2f} secs'.format(max_lag) if self.verbose and nodes_lagging: self.msg += ' [{0}]'.format(', '.join(nodes_lagging)) self.msg += ' | num_nodes_lagging={0}{1} max_response_age={2:.2f}s'\ .format(num_nodes_lagging, self.get_perf_thresholds(), max_lag)
def parse_json(self, json_data): if not isList(json_data): raise UnknownError('non-list returned by Presto for nodes. {0}'.format(support_msg_api())) num_worker_nodes = len(json_data) self.msg = 'Presto SQL worker nodes = {0}'.format(num_worker_nodes) self.check_thresholds(num_worker_nodes) self.msg += ' | num_worker_nodes={0}{1}'.format(num_worker_nodes, self.get_perf_thresholds('lower'))
def parse_json(self, json_data): if not isList(json_data): raise UnknownError('non-list returned by Atlas metadata server instance at {0}:{1}! {2}'\ .format(self.host, self.port, support_msg_api())) if len(json_data) < 1: raise CriticalError('no entities found!') if self.list_entities: print('=' * 100) print('{0:40} {1:25} {2}'.format('ID', 'Type', 'Name')) print('=' * 100) for entity in json_data: name = self.get_key(entity, 'name') _id = self.get_key(entity, 'id') _type = self.get_key(entity, 'type') print('{0:40} {1:25} {2}'.format(_id, _type, name)) sys.exit(ERRORS['UNKNOWN']) if self.entity_id: if len(json_data) > 1: raise CriticalError('more than one matching entity returned!') json_data = json_data[0] elif self.entity_name: for entity in json_data: if self.entity_name == self.get_key(entity, 'name'): # Recursion - a bit too clever but convenient self.entity_name = None self.entity_id = self.get_key(entity, 'id') self.path += '/{0}'.format(self.entity_id) req = self.query() self.process_json(req.content) # escape recursion return raise CriticalError("entity with name '{name}' not found!".format(name=self.entity_name)) name = self.get_key(json_data, 'name') state = self.get_key(json_data, 'state') # available for HDFS path but not DB #path = self.get_key(json_data, 'path') _type = self.get_key(json_data, 'type') tags = [] if 'trait_names' in json_data: tags = self.get_key(json_data, 'trait_names') #traits = self.get_key(json_data, 'traits') version = self.get_key(json_data, 'version') modified_date = self.get_key(json_data, 'modified_time') self.msg += " '{name}' exists, state='{state}'".format(name=name, state=state) if state != 'ACTIVE': self.critical() self.msg += " (expected 'ACTIVE')" self.msg += ", type='{type}'".format(type=_type) self.check_type(_type) #if self.verbose: self.msg += ", tags='{tags}'".format(tags=','.join(tags)) self.check_missing_tags(tags) #if self.verbose: #self.msg += ", traits='{traits}'".format(traits=','.join(traits)) #self.check_missing_traits(traits) if self.verbose: self.msg += ", modified_date='{modified_date}', version='{version}'".format( modified_date=modified_date, version=version )
def parse_json(self, json_data): if self.get_opt('list'): if not isList(json_data): raise UnknownError( 'non-list returned by CouchDB for databases') databases = json_data print('CouchDB databases:\n') if databases: for database in databases: print('{0}'.format(database)) else: print('<none>') sys.exit(ERRORS['UNKNOWN']) # not testing for DB existance from list of databases any more # if self.database in databases: # self.ok() # self.msg += 'exists' # else: # self.critical() # self.msg += 'does not exist!' # now using direct /{db} call instead for specific database # exception handling wrapped further up in class hierarchy if self.is_ok(): if json_data['db_name'] != self.database: raise UnknownError('db_name {} != {}'.format( json_data['db_name'], self.database)) self.msg += 'exists'
def gen_payload(self, services=None): log.debug('generating payload for services: %s', services) if services is None or services == 'all': services = self.get_services() if not isList(services): code_error('non-list passed to gen_payload') # determined from here: # https://community.hortonworks.com/questions/11111/is-there-a-way-to-execute-ambari-service-checks-in.html payload = [ { "RequestSchedule": { "batch": [ { "requests": [] }, { "batch_settings": { "batch_separation_in_seconds": 1, "task_failure_tolerance": 1 } } ] } } ] service_count = len(services) for index in range(service_count): service = services[index] index += 1 commandData = "" if service.upper() == "ZOOKEEPER" : # ZOOKEEPER service check command name is irregular ZOOKEEPER_QUORUM_SERVICE_CHECK, not ZOOKEEPER_SERVICE_CHECK commandData = "{service}_QUORUM_SERVICE_CHECK".format(service=service.upper()) else : commandData = "{service}_SERVICE_CHECK".format(service=service.upper()) payload[0]['RequestSchedule']['batch'][0]['requests'].append( { "order_id": index, "type": "POST", "uri": "/api/v1/clusters/{0}/requests".format(self.cluster), "RequestBodyInfo":{ "RequestInfo": { "command": "{commandData}".format(commandData=commandData) , "context": "{service} Service Check (batch {index} of {total})". format(service=service, index=index, total=service_count) }, "Requests/resource_filters":[ { "service_name": service.upper() } ] } } ) payload_str = json.dumps(payload) if log.isEnabledFor(logging.DEBUG): log.debug('generated payload:\n%s', jsonpp(payload_str)) return payload_str
def parse_json(self, json_data): if self.get_opt('list'): if not isList(json_data): raise UnknownError( 'non-list returned by CouchDB for databases') databases = json_data print('CouchDB databases:\n') if databases: for database in databases: print('{0}'.format(database)) else: print('<none>') sys.exit(ERRORS['UNKNOWN']) assert json_data['db_name'] == self.database doc_count = json_data['doc_count'] doc_del_count = json_data['doc_del_count'] data_size = json_data['data_size'] compact_running = json_data['compact_running'] self.msg += "'{0}' doc count = {1}".format(self.database, doc_count) self.check_thresholds(doc_count) self.msg += ', doc del count = {0}'.format(doc_del_count) self.msg += ', data size = {0}'.format(humanize.naturalsize(data_size)) self.msg += ', compact running = {0}'.format(compact_running) self.msg += ' | doc_count={0}{1} doc_del_count={2} data_size={3}b compact_running={4}'\ .format(doc_count, self.get_perf_thresholds(), doc_del_count, data_size, int(compact_running))
def extract_value(self, content): json_data = None try: json_data = json.loads(content) except ValueError: raise UnknownError("non-json data returned by consul: '%s'. %s" % (content, support_msg_api())) value = None if not isList(json_data): raise UnknownError("non-list returned by consul: '%s'. %s" % (content, support_msg_api())) if not json_data: raise UnknownError("blank list returned by consul! '%s'. %s" % (content, support_msg_api())) if len(json_data) > 1: raise UnknownError("more than one key returned by consul! response = '%s'. %s" \ % (content, support_msg_api())) try: value = json_data[0]['Value'] except KeyError: raise UnknownError("couldn't find field 'Value' in response from consul: '%s'. %s" % (content, support_msg_api())) try: # decodestring might be deprecated but decodebytes isn't available on Python 2.7 #value = base64.decodebytes(value) value = base64.decodestring(value) except TypeError: raise UnknownError("invalid data returned for key '{0}' value = '{1}', failed to base64 decode" .format(self.key, value)) return value
def parse_json(self, json_data): policy = None if self.policy_id: policy = json_data policy_list = [policy] if not self.policy_id or self.list_policies: policy_list = json_data['vXPolicies'] if not policy_list: raise CriticalError( 'Ranger policy not found! (check the --name is correct and that it really exists)' ) host_info = '' if self.verbose: host_info = " at '{0}:{1}'".format(self.host, self.port) if not isList(policy_list): raise UnknownError("non-list returned for json_data[vXPolicies] by Ranger{0}"\ .format(host_info)) if self.list_policies: self.print_policies(policy_list) sys.exit(ERRORS['UNKNOWN']) if policy is None and self.policy_name: for _ in policy_list: if _['policyName'] == self.policy_name: policy = _ break # this won't apply when --policy-id is given as it's a targeted query that will get 404 before this if not policy: raise CriticalError("no matching policy found with name '{name}' in policy list "\ .format(name=self.policy_name) + "returned by Ranger{host_info}".format(host_info=host_info)) self.check_policy(policy)
def extract_value(self, content): # pylint: disable=no-self-use json_data = None try: json_data = json.loads(content) except ValueError: qquit('UNKNOWN', "non-json data returned by consul: '%s'. %s" % (content, support_msg_api())) value = None if not isList(json_data): qquit('UNKNOWN', "non-list returned by consul: '%s'. %s" % (content, support_msg_api())) if not json_data: qquit('UNKNOWN', "blank list returned by consul! '%s'. %s" % (content, support_msg_api())) if len(json_data) > 1: qquit('UNKNOWN', "more than one key returned by consul! response = '%s'. %s" \ % (content, support_msg_api())) try: value = json_data[0]['Value'] except KeyError: qquit('UNKNOWN', "couldn't find field 'Value' in response from consul: '%s'. %s" \ % (content, support_msg_api())) try: value = base64.decodestring(value) except TypeError as _: qquit('UNKNOWN', "invalid data returned for key '%(key)s' value = '%(value)s', failed to base64 decode" \ % locals()) return value
def extract_value(self, content): # pylint: disable=no-self-use json_data = None try: json_data = json.loads(content) except ValueError: qquit( 'UNKNOWN', "non-json data returned by consul: '%s'. %s" % (content, support_msg_api())) value = None if not isList(json_data): qquit( 'UNKNOWN', "non-list returned by consul: '%s'. %s" % (content, support_msg_api())) if not json_data: qquit( 'UNKNOWN', "blank list returned by consul! '%s'. %s" % (content, support_msg_api())) if len(json_data) > 1: qquit('UNKNOWN', "more than one key returned by consul! response = '%s'. %s" \ % (content, support_msg_api())) try: value = json_data[0]['Value'] except KeyError: qquit('UNKNOWN', "couldn't find field 'Value' in response from consul: '%s'. %s" \ % (content, support_msg_api())) try: value = base64.decodestring(value) except TypeError as _: qquit('UNKNOWN', "invalid data returned for key '%(key)s' value = '%(value)s', failed to base64 decode" \ % locals()) return value
def parse_json(self, json_data): if not isList(json_data): raise UnknownError('non-list returned for storage plugins. {}'.format(support_msg_api())) if self.get_opt('list'): print('Apache Drill storage plugins:\n') print('=' * 50) print('%-10s\t%-10s\t%s' % ('Name', 'Type', 'Enabled')) print('=' * 50 + '\n') for storage_plugin in json_data: name = storage_plugin['name'] config = storage_plugin['config'] plugin_type = config['type'] enabled = config['enabled'] print('%-10s\t%-10s\t%s' % (name, plugin_type, enabled)) sys.exit(ERRORS['UNKNOWN']) config = None for storage_plugin in json_data: name = storage_plugin['name'] if name == self.storage_plugin: config = storage_plugin['config'] plugin_type = config['type'] enabled = config['enabled'] break if not config: raise CriticalError("Apache Drill storage plugin '{}' not found! See --list for available plugins!"\ .format(self.storage_plugin)) self.msg = "Apache Drill storage plugin '{}' enabled = {}, plugin type = '{}'"\ .format(self.storage_plugin, enabled, plugin_type) if not enabled: self.critical() _type = self.get_opt('type') if _type and _type != plugin_type: self.critical() self.msg += " (expected '{}')".format(_type)
def parse_json(self, json_data): policy = None if self.policy_id: policy = json_data policy_list = [policy] if not self.policy_id or self.list_policies: policy_list = json_data['vXPolicies'] if not policy_list: raise CriticalError('Ranger policy not found! (check the --name is correct and that it really exists)') host_info = '' if self.verbose: host_info = " at '{0}:{1}'".format(self.host, self.port) if not isList(policy_list): raise UnknownError("non-list returned for json_data[vXPolicies] by Ranger{0}"\ .format(host_info)) if self.list_policies: self.print_policies(policy_list) sys.exit(ERRORS['UNKNOWN']) if policy is None and self.policy_name: for _ in policy_list: if _['policyName'] == self.policy_name: policy = _ break # this won't apply when --policy-id is given as it's a targeted query that will get 404 before this if not policy: raise CriticalError("no matching policy found with name '{name}' in policy list "\ .format(name=self.policy_name) + "returned by Ranger{host_info}".format(host_info=host_info)) self.check_policy(policy)
def parse_json(self, json_data): if self.get_opt('list'): if not isList(json_data): raise UnknownError('non-list returned by CouchDB for databases') databases = json_data print('CouchDB databases:\n') if databases: for database in databases: print('{0}'.format(database)) else: print('<none>') sys.exit(ERRORS['UNKNOWN']) # not testing for DB existance from list of databases any more # if self.database in databases: # self.ok() # self.msg += 'exists' # else: # self.critical() # self.msg += 'does not exist!' # now using direct /{db} call instead for specific database # exception handling wrapped further up in class hierarchy if self.is_ok(): if json_data['db_name'] != self.database: raise UnknownError('db_name {} != {}'.format(json_data['db_name'], self.database)) self.msg += 'exists'
def get_tables(self): try: tables = self.conn.tables() if not isList(tables): qquit('UNKNOWN', 'table list returned is not a list! ' + support_msg_api()) except (socket.error, socket.timeout, ThriftException, HBaseIOError) as _: qquit('CRITICAL', 'error while trying to get table list: {0}'.format(_))
def parse_json(self, json_data): if not isList(json_data): raise UnknownError( 'non-list returned for config settings. {}'.format( support_msg_api())) if self.list_config: print('Apache Drill config settings:\n') for config in json_data: print('{} = {}'.format(config['name'], config['value'])) sys.exit(ERRORS['UNKNOWN']) value = None for config in json_data: name = config['name'] if name == self.config_key: value = config['value'] break if value is None: raise UnknownError( "config key '{}' not found. See --list for all config keys". format(self.config_key)) # intentionally using name instead of self.config_key to cause NameError if not set or make error more visible if wrong key match self.msg = "Apache Drill config '{}' = '{}'".format(name, value) if re.match(str(self.expected_value), str(value), re.I): self.ok() else: self.critical() self.msg += " (expected '{}')".format(self.expected_value)
def get_tables(self): try: tables = self.conn.tables() if not isList(tables): qquit('UNKNOWN', 'table list returned is not a list! ' + support_msg_api()) except (socket.timeout, ThriftException, HBaseIOError) as _: qquit('CRITICAL', 'error while trying to get table list: {0}'.format(_))
def extract_value(self, content): # pylint: disable=no-self-use json_data = None try: json_data = json.loads(content) except ValueError: raise UnknownError("non-json data returned by consul: '%s'. %s" % (content, support_msg_api())) value = None if not isList(json_data): raise UnknownError("non-list returned by consul: '%s'. %s" % (content, support_msg_api())) if not json_data: raise UnknownError("blank list returned by consul! '%s'. %s" % (content, support_msg_api())) if len(json_data) > 1: raise UnknownError( "more than one key returned by consul! response = '%s'. %s" % (content, support_msg_api()) ) try: value = json_data[0]["Value"] except KeyError: raise UnknownError( "couldn't find field 'Value' in response from consul: '%s'. %s" % (content, support_msg_api()) ) try: value = base64.decodestring(value) except TypeError as _: raise UnknownError( "invalid data returned for key '{0}' value = '{1}', failed to base64 decode".format(self.key, value) ) return value
def parse_json(self, json_data): if not isList(json_data): raise UnknownError('non-list returned by Atlas metadata server instance at {0}:{1}! {2}'\ .format(self.host, self.port, support_msg_api())) if len(json_data) < 1: raise CriticalError('no entities found!') if self.list_entities: print('=' * 100) print('{0:40} {1:25} {2}'.format('ID', 'Type', 'Name')) print('=' * 100) for entity in json_data: name = self.get_key(entity, 'name') _id = self.get_key(entity, 'id') _type = self.get_key(entity, 'type') print('{0:40} {1:25} {2}'.format(_id, _type, name)) sys.exit(ERRORS['UNKNOWN']) if self.entity_id: if len(json_data) > 1: raise CriticalError('more than one matching entity returned!') json_data = json_data[0] elif self.entity_name: for entity in json_data: if self.entity_name == self.get_key(entity, 'name'): # Recursion - a bit too clever but convenient self.entity_name = None self.entity_id = self.get_key(entity, 'id') self.path += '/{0}'.format(self.entity_id) req = self.query() self.process_json(req.content) # escape recursion return raise CriticalError("entity with name '{name}' not found!".format(name=self.entity_name)) name = self.get_key(json_data, 'name') state = self.get_key(json_data, 'state') # available for HDFS path but not DB #path = self.get_key(json_data, 'path') _type = self.get_key(json_data, 'type') tags = [] if 'trait_names' in json_data: tags = self.get_key(json_data, 'trait_names') #traits = self.get_key(json_data, 'traits') version = self.get_key(json_data, 'version') modified_date = self.get_key(json_data, 'modified_time') self.msg = " '{name}' exists, state='{state}'".format(name=name, state=state) if state != 'ACTIVE': self.critical() self.msg += " (expected 'ACTIVE')" self.msg += ", type='{type}'".format(type=_type) self.check_type(_type) #if self.verbose: self.msg += ", tags='{tags}'".format(tags=','.join(tags)) self.check_missing_tags(tags) #if self.verbose: #self.msg += ", traits='{traits}'".format(traits=','.join(traits)) #self.check_missing_traits(traits) if self.verbose: self.msg += ", modified_date='{modified_date}', version='{version}'".format( modified_date=modified_date, version=version )
def run(self): log.info("querying %s", self.software) url = "{protocol}://{host}:{port}/PolicyManagement/{api_version}/deployments".format( host=self.host, port=self.port, api_version=self.api_version, protocol=self.protocol ) log.debug("GET %s", url) try: req = requests.get(url, auth=HTTPBasicAuth(self.user, self.password)) except requests.exceptions.RequestException as _: errhint = "" if "BadStatusLine" in str(_.message): errhint = " (possibly connecting to an SSL secured port without using --ssl?)" elif self.protocol == "https" and "unknown protocol" in str(_.message): errhint = " (possibly connecting to a plain HTTP port with the -S / --ssl switch enabled?)" qquit("CRITICAL", str(_) + errhint) log.debug("response: %s %s", req.status_code, req.reason) log.debug("content:\n%s\n%s\n%s", "=" * 80, req.content.strip(), "=" * 80) if req.status_code == 400 and req.reason == "Bad Request": qquit( "CRITICAL", "{0}: {1} (possibly new install with no deployments yet?)".format(req.status_code, req.reason), ) if req.status_code != 200: qquit("CRITICAL", "{0}: {1}".format(req.status_code, req.reason)) try: json_list = json.loads(req.content) if log.isEnabledFor(logging.DEBUG): print(jsonpp(json_list)) print("=" * 80) if not isList(json_list): raise ValueError("returned content is not a list") if not json_list: qquit("UNKNOWN", "no deployments found") last_deployment = json_list[0] userid = last_deployment["UserId"] description = last_deployment["Description"] hostname = last_deployment["HostName"] timestamp = last_deployment["timestamp"] last_deploy_datetime = datetime.strptime(timestamp, "%b %d, %Y %H:%M:%S %p") except (KeyError, ValueError) as _: qquit( "UNKNOWN", "error parsing output from {software}: {exception}: {error}. {support_msg}".format( software=self.software, exception=type(_).__name__, error=_, support_msg=support_msg_api() ), ) timedelta = datetime.now() - last_deploy_datetime mins = int(int(timedelta.total_seconds()) / 60) self.msg = "{software} last deployment was at '{timestamp}', {mins} mins ago".format( software=self.software, timestamp=timestamp, mins=mins ) self.check_thresholds(mins) if self.verbose: self.msg += " by user '{userid}', host = '{hostname}', description = '{description}'".format( userid=userid, hostname=hostname, description=description ) self.msg += " | mins_since_last_deployment={mins}{thresholds}".format( mins=mins, thresholds=self.get_perf_thresholds(boundary="lower") )
def list_metrics(self): json_struct = self.get('names') if not isList(json_struct): raise ValueError("non-list returned by Attivio Perfmon host for metric names (got type '{0}'"\ .format(type(json_struct))) print('Attivio metrics:\n') for metric in sorted(json_struct): print(metric) sys.exit(ERRORS['UNKNOWN'])
def parse_json(self, json_data): if not isList(json_data): raise UnknownError( 'non-list returned by Presto for nodes failed. {0}'.format( support_msg_api())) num_failed_nodes = len(json_data) self.msg = 'Presto SQL - {0} worker node{1} failed'.format( num_failed_nodes, plural(num_failed_nodes)) self.check_thresholds(num_failed_nodes)
def mac_get_arg(args): if not args: return '' if not isList(args): die("non-list '{args}' passed to mac_getent_passwd()".format(args=args)) if len(args) > 1: die('only one arg is supported on Mac at this time') arg = args[0] return arg
def run(self): log.info('querying %s', self.software) url = '{protocol}://{host}:{port}/PolicyManagement/{api_version}/deployments'\ .format(host=self.host, port=self.port, api_version=self.api_version, protocol=self.protocol) log.debug('GET %s', url) try: req = requests.get(url, auth=HTTPBasicAuth(self.user, self.password)) except requests.exceptions.RequestException as _: errhint = '' if 'BadStatusLine' in str(_.message): errhint = ' (possibly connecting to an SSL secured port without using --ssl?)' elif self.protocol == 'https' and 'unknown protocol' in str( _.message): errhint = ' (possibly connecting to a plain HTTP port with the -S / --ssl switch enabled?)' qquit('CRITICAL', str(_) + errhint) log.debug("response: %s %s", req.status_code, req.reason) log.debug("content:\n%s\n%s\n%s", '=' * 80, req.content.strip(), '=' * 80) if req.status_code == 400 and req.reason == 'Bad Request': qquit('CRITICAL', '{0}: {1} (possibly new install with no deployments yet?)'\ .format(req.status_code, req.reason)) if req.status_code != 200: qquit('CRITICAL', '{0}: {1}'.format(req.status_code, req.reason)) try: json_list = json.loads(req.content) if log.isEnabledFor(logging.DEBUG): print(jsonpp(json_list)) print('=' * 80) if not isList(json_list): raise ValueError('returned content is not a list') if not json_list: qquit('UNKNOWN', 'no deployments found') last_deployment = json_list[0] userid = last_deployment['UserId'] description = last_deployment['Description'] hostname = last_deployment['HostName'] timestamp = last_deployment['timestamp'] last_deploy_datetime = datetime.strptime(timestamp, '%b %d, %Y %H:%M:%S %p') except (KeyError, ValueError) as _: qquit('UNKNOWN', 'error parsing output from {software}: {exception}: {error}. {support_msg}'\ .format(software=self.software, exception=type(_).__name__, error=_, support_msg=support_msg_api())) timedelta = datetime.now() - last_deploy_datetime mins = int(int(timedelta.total_seconds()) / 60) self.msg = "{software} last deployment was at '{timestamp}', {mins} mins ago".format( software=self.software, timestamp=timestamp, mins=mins) self.check_thresholds(mins) if self.verbose: self.msg += " by user '{userid}', host = '{hostname}', description = '{description}'"\ .format(userid=userid, hostname=hostname, description=description) self.msg += ' | mins_since_last_deployment={mins}{thresholds}'\ .format(mins=mins, thresholds=self.get_perf_thresholds(boundary='lower'))
def mac_get_arg(args): if not args: return '' if not isList(args): die("non-list '{args}' passed to mac_getent_passwd()".format( args=args)) if len(args) > 1: die('only one arg is supported on Mac at this time') arg = args[0] return arg
def check_table_regions(self): log.info('checking regions for table \'%s\'', self.table) regions = None try: table = self.conn.table(self.table) regions = table.regions() except HBaseIOError as _: #if 'org.apache.hadoop.hbase.TableNotFoundException' in _.message: if 'TableNotFoundException' in _.message: qquit('CRITICAL', 'table \'{0}\' does not exist'.format(self.table)) else: qquit('CRITICAL', _) except (socket.error, socket.timeout, ThriftException) as _: qquit('CRITICAL', _) if log.isEnabledFor(logging.DEBUG): log.debug('%s', jsonpp(regions)) if not regions: qquit('CRITICAL', 'failed to get regions for table \'{0}\''.format(self.table)) if not isList(regions): qquit('UNKNOWN', 'region info returned is not a list! ' + support_msg_api()) num_regions = len(regions) log.info('num regions: %s', num_regions) self.msg = 'HBase table \'{0}\' has {1} region{2}'.format( self.table, num_regions, plural(num_regions)) self.check_thresholds(num_regions) num_unassigned_regions = 0 for region in regions: try: if not region['server_name']: #log.debug('region \'%s\' is not assigned to any server', region['name']) num_unassigned_regions += 1 except KeyError as _: qquit( 'UNKNOWN', 'failed to find server assigned to region. ' + support_msg_api()) log.info('num unassigned regions: %s', num_unassigned_regions) self.msg += ', {0} unassigned region{1}'.format( num_unassigned_regions, plural(num_unassigned_regions)) if num_unassigned_regions > 0: self.warning() self.msg += '!' self.msg += ' |' self.msg += ' num_regions={0}'.format( num_regions) + self.get_perf_thresholds(boundary='lower') self.msg += ' num_unassigned_regions={0};1;0'.format( num_unassigned_regions) log.info('finished, closing connection') self.conn.close()
def parse_json(self, json_data): if not isList(json_data): raise UnknownError('non-list returned by Presto for queries. {0}'.format(support_msg_api())) current_queries = [query for query in json_data if query['state'] not in ('FINISHED', 'FAILED')] num_queries = len(current_queries) self.msg = 'Presto SQL {0} queries'.format(num_queries) self.check_thresholds(num_queries) self.msg += ' on coordinator' if self.verbose: self.msg += ' {0}:{1}'.format(self.host, self.port) self.msg += ' | num_queries={0}{1}'.format(num_queries, self.get_perf_thresholds())
def parse_json(self, json_data): if self.get_opt('list_vhosts'): print('RabbitMQ vhosts:\n') print('\n'.join([_['name'] for _ in json_data])) sys.exit(ERRORS['UNKNOWN']) # when returning all vhosts, otherwise will return lone dict item or 404 if not isList(json_data): raise UnknownError("non-list returned by RabbitMQ (got type '{0}'). {1}"\ .format(type(json_data), support_msg_api())) self.msg = "{0} vhost '{1}' ".format(self.name, self.vhost) self.check_vhost(json_data)
def list_databases(self, json_data): if self.get_opt('list'): if not isList(json_data): raise UnknownError('non-list returned by CouchDB for databases') databases = json_data print('CouchDB databases:\n') if databases: for database in databases: print('{0}'.format(database)) else: print('<none>') sys.exit(ERRORS['UNKNOWN'])
def check_workflow(self, workflow_name, workflow_id): log.info("checking workflow '%s' id '%s'", workflow_name, workflow_id) # GET /workflow/fetchWorkflowStatus/<instance_id> is also available but only uses wfId, doesn't support wfName # returns ['result']['list'] = [ {}, {}, ... ] (req, self.query_time) = self.req( url='{url_base}/workflow/publish/getWorkflowExecutionHistory'. format(url_base=self.url_base), # orders by newest first, but seems to return last 10 anyway body=json.dumps({ 'chunk_size': 1, 'currentPage': 1, 'wfName': workflow_name, 'wfId': workflow_id })) info = '' if workflow_name: info += " name '{0}'".format(workflow_name) if workflow_id: info += " id '{0}'".format(workflow_id) try: json_dict = json.loads(req.content) result = json_dict['result'] not_found_err = '{0}. {1}'.format(info, self.extract_response_message(json_dict)) + \ 'Perhaps you specified the wrong name/id or the workflow hasn\'t run yet? ' + \ 'Use --list to see existing workflows' if result is None: if self._all: return None qquit('CRITICAL', "no results found for workflow{0}".format(not_found_err)) reports = result['jobExecutionReports'] if not isList(reports): raise ValueError('jobExecutionReports is not a list') if not reports: qquit('CRITICAL', "no reports found for workflow{0}".format(not_found_err)) # orders by newest first by default, checking last run only report = self.get_latest_complete_report(reports) status = report['status'] if status == 'SUCCESS': pass elif status == 'INCOMPLETE': self.warning() else: self.critical() self.msg += "workflow '{workflow}' id '{id}' status = '{status}'".format( workflow=report['wfName'], id=report['wfId'], status=status) if not self._all: self.check_times(report['startDate'], report['endDate']) return status except (KeyError, ValueError) as _: qquit('UNKNOWN', 'error parsing workflow execution history: {0}'.format(_))
def run(self): content = self.get() try: json_dict = json.loads(content) if log.isEnabledFor(logging.DEBUG): print(jsonpp(json_dict)) print('=' * 80) if not isDict(json_dict): raise ValueError('returned content is not a dict') status = json_dict['status'] if status != 'success': qquit( 'CRITICAL', "request status = '{0}' (expected 'success')".format( status)) status_code = json_dict['statusCode'] if status_code != 200: qquit( 'CRITICAL', "request status code = '{0}' (expected '200')".format( status_code)) message = json_dict['message'] data = json_dict['data'] if not data: num_endpoints = 0 elif not isList(data): qquit('CRITICAL', 'non-list returned for policy end points data') else: num_endpoints = len(data) match = re.match( message, r'Total [(\d+)] policy engine end point\(s\) found', re.I) if not match: raise ValueError( 'failed to parse message for confirmation of number of endpoints' ) message_num_endpoints = int(match.group(1)) if num_endpoints != message_num_endpoints: raise ValueError( 'num endpoints does not match parsed value from returned message' ) except (KeyError, ValueError) as _: qquit('UNKNOWN', 'error parsing output from {software}: {exception}: {error}. {support_msg}'\ .format(software=self.software, exception=type(_).__name__, error=_, support_msg=support_msg_api())) self.msg = "{software} number of policy end points = {num_endpoints}"\ .format(software=self.software, num_endpoints=num_endpoints) self.check_thresholds(num_endpoints) self.msg += ' | num_endpoints={num_endpoints}'.format( num_endpoints=num_endpoints) + self.get_perf_thresholds()
def parse_json(self, json_data): if not isList(json_data): raise UnknownError( 'non-list returned by Presto for queries. {0}'.format( support_msg_api())) matching_queries = [] for query_item in json_data: query = query_item['query'] log.info('query: %s', query) if self.exclude and self.exclude.search(query): log.info("excluding query '%s'", query) continue if self.include: if not self.include.search(query): continue log.info("including query: %s", query) matching_queries.append(query_item) num_matching_queries = len(matching_queries) # limit searching to last --num queries if num_matching_queries < self.num: log.info( 'number of matching queries %d is less than query limit of %d', num_matching_queries, self.num) self.num = num_matching_queries last_n_matching_queries = matching_queries[0:self.num] if self.list: self.list_queries(last_n_matching_queries) selected_queries = [query_item for query_item in last_n_matching_queries \ if query_item['state'] in self.state_selector] if log.isEnabledFor(logging.INFO): for query_item in matching_queries: log.info('%s query found: %s', self.state_selector, query_item['query']) num_selected_queries = len(selected_queries) self.msg = 'Presto SQL - {0} {1} queries'.format( num_selected_queries, self.state_selector[0].lower()) self.check_thresholds(num_selected_queries) self.msg += ' out of last {0}'.format(num_matching_queries) if self.include or self.exclude: self.msg += ' matching' self.msg += ' queries' if num_matching_queries < self.min_queries: self.warning() self.msg += ' (< {0})'.format(self.min_queries) self.msg += ' on coordinator' if self.verbose: self.msg += ' {0}:{1}'.format(self.host, self.port) self.msg += ' | num_{0}_queries={1}{2} num_matching_queries={3}:{4}'\ .format(self.state_selector[0].lower(), num_selected_queries, self.get_perf_thresholds(), num_matching_queries, self.min_queries)
def parse_metrics(self, json_struct): if not isList(json_struct): raise ValueError("non-list returned by Attivio AIE Perfmon metrics API (got type '{0}')"\ .format(type(json_struct))) metrics = {} if not json_struct: qquit('UNKNOWN', "no matching metrics found for '{0}'".format(self.metrics) + \ ", use --list-metrics to check you've specified a correct metric") for item in json_struct: if not isDict(item): raise ValueError("non-dict item found in list returned by Attivio AIE Perfmon API (got type '{0}')"\ .format(type(item))) if not isList(item['values']): raise ValueError("non-list returned for metric value by Attivio AIE Perfmon API (got type '{0}')"\ .format(type(item['values']))) metric = item['metric'] log.info('metric = %s', metric) if self.skip_metric(item): log.info('skipping metric %s due to filters', metric) continue for key in ('nodeset', 'hostname', 'workflowType', 'workflow', 'component', 'path', 'networkInterface'): if key in item: val = item[key] log.info('%s = %s', key, val) # Attivio returns network interfaces in form "lo - 127.0.0.1" if key == 'networkInterface': val = val.split()[0] metric += '.{0}'.format(val) value = item['values'][0] log.info('value = %s\n', value) if self.precision and isFloat(value): # leaving as string will result in lots of trailing zeros value = float('{value:.{precision}f}'.format( value=value, precision=self.precision)) if metric in metrics: qquit('UNKNOWN', "duplicate metric '{metric}' discovered! {support_msg}"\ .format(metric=metric, support_msg=support_msg_api())) metrics[metric] = value return metrics
def check_read(self, table_conn, row, column, expected=None): log.info("getting cell for row '%s' column '%s'", row, column) cells = [] query_time = None start = time.time() cells = table_conn.cells(row, column, versions=1) query_time = (time.time() - start) * 1000 log.info('query read in %s ms', query_time) cell_info = "HBase table '{0}' row '{1}' column '{2}'".format( self.table, row, column) log.debug('cells returned: %s', cells) if not isList(cells): qquit('UNKNOWN', 'non-list returned for cells. ' + support_msg_api()) if len(cells) < 1: qquit( 'CRITICAL', "no cell value found in {0}, does row / column family combination exist?" .format(cell_info)) elif len(cells) > 1: qquit('UNKNOWN', "more than one cell returned! " + support_msg_api()) value = cells[0] log.info('value = %s', value) if self.regex: log.info( "checking cell's value '{0}' against expected regex '{1}'". format(value, self.regex)) if not re.search(self.regex, value): qquit( 'CRITICAL', "cell value '{0}' (expected regex '{1}') for {2}".format( value, self.regex, cell_info)) if expected: log.info( "checking cell's value is exactly expected value '{0}'".format( expected)) if value != expected: qquit( 'CRITICAL', "cell value '{0}' (expected '{1}') for {2}".format( value, expected, cell_info)) self.timings[column] = self.timings.get(column, {}) self.timings[column]['read'] = max(self.timings[column].get('read', 0), query_time) self.value = value return (value, query_time)
def gen_payload(self, services=None): log.debug('generating payload for services: %s', services) if services is None or services == 'all': services = self.get_services() if not isList(services): code_error('non-list passed to gen_payload') # determined from here: # https://community.hortonworks.com/questions/11111/is-there-a-way-to-execute-ambari-service-checks-in.html payload = [ { "RequestSchedule": { "batch": [ { "requests": [] }, { "batch_settings": { "batch_separation_in_seconds": 1, "task_failure_tolerance": 1 } } ] } } ] service_count = len(services) for index in range(service_count): service = services[index] index += 1 payload[0]['RequestSchedule']['batch'][0]['requests'].append( { "order_id": index, "type": "POST", "uri": "/api/v1/clusters/{0}/requests".format(self.cluster), "RequestBodyInfo":{ "RequestInfo": { "command": "{service}_SERVICE_CHECK".format(service=service.upper()), "context": "{service} Service Check (batch {index} of {total})". format(service=service, index=index, total=service_count) }, "Requests/resource_filters":[ { "service_name": service.upper() } ] } } ) payload_str = json.dumps(payload) if log.isEnabledFor(logging.DEBUG): log.debug('generated payload:\n%s', jsonpp(payload_str)) return payload_str
def list_databases(self, json_data): if self.get_opt('list'): if not isList(json_data): raise UnknownError( 'non-list returned by CouchDB for databases') databases = json_data print('CouchDB databases:\n') if databases: for database in databases: print('{0}'.format(database)) else: print('<none>') sys.exit(ERRORS['UNKNOWN'])
def parse_json(self, json_data): # when returning all queues, otherwise will return lone dict item or 404 if self.get_opt('list_queues'): if not isList(json_data): raise UnknownError("non-list returned by RabbitMQ (got type '{0}'). {1}"\ .format(type(json_data), support_msg_api())) print("RabbitMQ queues on vhost '{0}':\n".format(self.vhost)) print('\n'.join([_['name'] for _ in json_data])) sys.exit(ERRORS['UNKNOWN']) self.msg = "RabbitMQ queue '{0}' ".format(self.queue) if self.verbose: self.msg += "on vhost '{0}' ".format(self.vhost) self.check_queue(json_data)
def parse_metrics(self, json_struct): if not isList(json_struct): raise ValueError("non-list returned by Attivio AIE Perfmon metrics API (got type '{0}')"\ .format(type(json_struct))) metrics = {} if not json_struct: qquit('UNKNOWN', "no matching metrics found for '{0}'".format(self.metrics) + \ ", use --list-metrics to check you've specified a correct metric") for item in json_struct: if not isDict(item): raise ValueError("non-dict item found in list returned by Attivio AIE Perfmon API (got type '{0}')"\ .format(type(item))) if not isList(item['values']): raise ValueError("non-list returned for metric value by Attivio AIE Perfmon API (got type '{0}')"\ .format(type(item['values']))) metric = item['metric'] log.info('metric = %s', metric) if self.skip_metric(item): log.info('skipping metric %s due to filters', metric) continue for key in ('nodeset', 'hostname', 'workflowType', 'workflow', 'component', 'path', 'networkInterface'): if key in item: val = item[key] log.info('%s = %s', key, val) # Attivio returns network interfaces in form "lo - 127.0.0.1" if key == 'networkInterface': val = val.split()[0] metric += '.{0}'.format(val) value = item['values'][0] log.info('value = %s\n', value) if self.precision and isFloat(value): # leaving as string will result in lots of trailing zeros value = float('{value:.{precision}f}'.format(value=value, precision=self.precision)) if metric in metrics: qquit('UNKNOWN', "duplicate metric '{metric}' discovered! {support_msg}"\ .format(metric=metric, support_msg=support_msg_api())) metrics[metric] = value return metrics
def parse_consul_json(self, name, content): json_data = None try: json_data = json.loads(content) except ValueError: raise UnknownError("non-json {} data returned by consul at {}:{}: '{}'. {}"\ .format(name, self.host, self.port, content, support_msg_api())) if not json_data: raise UnknownError("blank {} contents returned by consul at {}:{}! '{}'. {}"\ .format(name, self.host, self.port, content, support_msg_api())) if not isList(json_data): raise UnknownError('non-list {} returned by consul at {}:{} for session data. {}'\ .format(name, self.host, self.port, support_msg_api())) return json_data
def get_peers(content): json_data = None try: json_data = json.loads(content) except ValueError: raise UnknownError("non-json data returned by consul: '%s'. %s" % (content, support_msg_api())) if not isList(json_data): raise UnknownError("non-list returned by consul: '%s'. %s" % (content, support_msg_api())) if not json_data: raise UnknownError("blank list returned by consul! '%s'. %s" % (content, support_msg_api())) for peer in json_data: log.debug('peer: {0}'.format(peer)) peers = uniq_list(json_data) return peers
def get_workflows(self): log.info('listing workflows') (req, _) = self.req(url='{url_base}/workflow/getWorkFlows'.format(url_base=self.url_base), # if you have more than 1M workflows in Zaloni you're probably bankrupt or # have migrated to an open source tool already ;) body=json.dumps({'chunk_size': 1000000, 'currentPage': 1, 'sortBy': 'wfName'})) try: json_dict = json.loads(req.content) workflows = json_dict['result']['workFlowDetails'] if not isList(workflows): qquit('UNKNOWN', 'non-list returned for workFlowDetails.' + support_msg_api()) return workflows except ValueError as _: qquit('UNKNOWN', 'failed to parse response from Zaloni Bedrock when requesting workflow list: {0}'\ .format(_))
def parse_json(self, json_data): dynamic = self.get_key(json_data, 'dynamic') peers = self.get_key(json_data, 'peers') if not isList(peers): raise UnknownError('\'peers\' field is not a list as expected! {0}'.format(support_msg_api())) peer_count = len(peers) if self.regex: regex = re.compile(self.regex, re.I) if not self.find_peer(regex, peers): self.msg += 'no peer found matching \'{0}\', '.format(self.regex) self.critical() self.msg += '{0} peer{1} found'.format(peer_count, plural(peer_count)) self.check_thresholds(peer_count) self.msg += ', dynamic = {0}'.format(dynamic) self.msg += ' | hiveserver2_llap_peers={0}{1}'.format(peer_count, self.get_perf_thresholds(boundary='lower'))
def check_workflow(self, workflow_name, workflow_id): log.info("checking workflow '%s' id '%s'", workflow_name, workflow_id) # GET /workflow/fetchWorkflowStatus/<instance_id> is also available but only uses wfId, doesn't support wfName # returns ['result']['list'] = [ {}, {}, ... ] (req, self.query_time) = self.req(url='{url_base}/workflow/publish/getWorkflowExecutionHistory' .format(url_base=self.url_base), # orders by newest first, but seems to return last 10 anyway body=json.dumps({'chunk_size': 1, 'currentPage': 1, 'wfName': workflow_name, 'wfId': workflow_id})) info = '' if workflow_name: info += " name '{0}'".format(workflow_name) if workflow_id: info += " id '{0}'".format(workflow_id) try: json_dict = json.loads(req.content) result = json_dict['result'] not_found_err = '{0}. {1}'.format(info, self.extract_response_message(json_dict)) + \ 'Perhaps you specified the wrong name/id or the workflow hasn\'t run yet? ' + \ 'Use --list to see existing workflows' if result is None: if self._all: return None qquit('CRITICAL', "no results found for workflow{0}".format(not_found_err)) reports = result['jobExecutionReports'] if not isList(reports): raise ValueError('jobExecutionReports is not a list') if not reports: qquit('CRITICAL', "no reports found for workflow{0}".format(not_found_err)) # orders by newest first by default, checking last run only report = self.get_latest_complete_report(reports) status = report['status'] if status == 'SUCCESS': pass elif status == 'INCOMPLETE': self.warning() else: self.critical() self.msg += "workflow '{workflow}' id '{id}' status = '{status}'".format(workflow=report['wfName'], id=report['wfId'], status=status) if not self._all: self.check_times(report['startDate'], report['endDate']) return status except (KeyError, ValueError) as _: qquit('UNKNOWN', 'error parsing workflow execution history: {0}'.format(_))
def get_latest_complete_report(reports): if not isList(reports): code_error('non-list passed to get_lastest_complete_report()') if not reports: qquit('UNKNOWN', 'no reports passed to get_latest_complete_report()') num_reports = len(reports) index = 0 report = reports[index] while report['status'] == 'INCOMPLETE': index += 1 if index < num_reports: report = reports[index] else: log.warn('only incomplete workflows detected, will have to use latest incomplete workflow') report = reports[0] return report
def parse_json(self, json_data): if not isList(json_data): raise UnknownError('non-list returned by Presto for {type}. {msg}'.format( type=self.query_type, msg=support_msg_api())) current_queries = self.filter(json_data) num_queries = len(current_queries) self.msg = 'Presto SQL - {0} current {1}'.format(num_queries, self.query_type) self.check_thresholds(num_queries) # check_presto_num_tasks.py works against workers too #self.msg += ' on {0}'.format(self.query_on_node) if self.verbose: self.msg += ' {0}:{1}'.format(self.host, self.port) self.msg += ' | num_current_{type}={num}{thresholds}'.format( type=self.query_type, num=num_queries, thresholds=self.get_perf_thresholds())
def get_peers(content): json_data = None try: json_data = json.loads(content) except ValueError: raise UnknownError("non-json data returned by consul: '%s'. %s" % (content, support_msg_api())) if not json_data: raise CriticalError('no peers found, recently started?') #if not json_data: # raise UnknownError("blank list returned by consul! '%s'. %s" % (content, support_msg_api())) if not isList(json_data): raise UnknownError("non-list returned by consul: '%s'. %s" % (content, support_msg_api())) for peer in json_data: log.debug('peer: {0}'.format(peer)) peers = uniq_list(json_data) return peers
def parse_json(self, json_data): if not isList(json_data): raise UnknownError('non-list returned by Presto for queries. {0}'.format(support_msg_api())) matching_queries = [] for query_item in json_data: query = query_item['query'] log.info('query: %s', query) if self.exclude and self.exclude.search(query): log.info("excluding query '%s'", query) continue if self.include: if not self.include.search(query): continue log.info("including query: %s", query) matching_queries.append(query_item) num_matching_queries = len(matching_queries) # limit searching to last --num queries if num_matching_queries < self.num: log.info('number of matching queries %d is less than query limit of %d', num_matching_queries, self.num) self.num = num_matching_queries last_n_matching_queries = matching_queries[0:self.num] if self.list: self.list_queries(last_n_matching_queries) selected_queries = [query_item for query_item in last_n_matching_queries \ if query_item['state'] in self.state_selector] if log.isEnabledFor(logging.INFO): for query_item in matching_queries: log.info('%s query found: %s', self.state_selector, query_item['query']) num_selected_queries = len(selected_queries) self.msg = 'Presto SQL - {0} {1} queries'.format(num_selected_queries, self.state_selector[0].lower()) self.check_thresholds(num_selected_queries) self.msg += ' out of last {0}'.format(num_matching_queries) if self.include or self.exclude: self.msg += ' matching' self.msg += ' queries' if num_matching_queries < self.min_queries: self.warning() self.msg += ' (< {0})'.format(self.min_queries) self.msg += ' on coordinator' if self.verbose: self.msg += ' {0}:{1}'.format(self.host, self.port) self.msg += ' | num_{0}_queries={1}{2} num_matching_queries={3}:{4}'\ .format(self.state_selector[0].lower(), num_selected_queries, self.get_perf_thresholds(), num_matching_queries, self.min_queries)
def check_missing_traits(self, traits): if not isList(traits): raise UnknownError('traits non-list returned. {0}'.format(support_msg_api())) if self.traits: missing_traits = [] #traits = [t.lower() for t in traits] for trait in self.traits: #if trait.lower() not in traits: if trait not in traits: missing_traits.append(trait) if missing_traits: self.critical() self.msg += " (expected trait{plural} '{missing_traits}' not found in entity)".format( missing_traits=','.join(missing_traits), plural=plural(self.traits)) return missing_traits return []
def check_missing_tags(self, tags): if not isList(tags): raise UnknownError('tags non-list returned. {0}'.format(support_msg_api())) if self.tags: missing_tags = [] #tags = [t.lower() for t in tags] for tag in self.tags: #if tag.lower() not in tags: if tag not in tags: missing_tags.append(tag) if missing_tags: self.critical() self.msg += " (expected tag{plural} '{missing_tags}' not found in entity)".format( missing_tags=','.join(missing_tags), plural=plural(self.tags)) return missing_tags return []