def run(self): server_url = '{proto}://{host}:{port}'.format(proto=self.protocol, host=self.host, port=self.port) try: log.debug('setting up Jenkins connection to %s', server_url) start_time = time.time() server = jenkins.Jenkins(server_url, username=self.user, password=self.password, timeout=self.timeout / 3) if log.isEnabledFor(logging.DEBUG): log.debug('getting user') user = server.get_whoami() log.debug('connected as user %s', jsonpp(user)) log.debug('getting plugin info') #plugins = server.get_plugins() # deprecated but .get_plugins() output is not JSON serializable # so must use old deprecated method get_plugins_info() :-/ plugins = server.get_plugins_info() query_time = time.time() - start_time except jenkins.JenkinsException as _: raise CriticalError(_) if log.isEnabledFor(logging.DEBUG): log.debug('%s', jsonpp(plugins)) plugin_count = len(plugins) update_count = 0 for plugin in plugins: if plugin['hasUpdate']: update_count += 1 self.msg += " {0} plugin update{1} available out of {2} installed plugin{3}".format(update_count, plural(update_count), plugin_count, plural(plugin_count)) if update_count: self.warning() self.msg += ' | updates_available={0};1 plugins_installed={1} query_time={2:.4f}s'.format(update_count, plugin_count, query_time)
def run(self): server_url = '{proto}://{host}:{port}'.format(proto=self.protocol, host=self.host, port=self.port) try: log.debug('setting up Jenkins connection to %s', server_url) start_time = time.time() server = jenkins.Jenkins(server_url, username=self.user, password=self.password, timeout=self.timeout / 3) if log.isEnabledFor(logging.DEBUG): log.debug('getting user') user = server.get_whoami() log.debug('connected as user %s', jsonpp(user)) log.debug('fetching running builds') running_builds = server.get_running_builds() if log.isEnabledFor(logging.DEBUG): log.debug('%s', jsonpp(running_builds)) running_build_count = len(running_builds) log.debug('running build count: %s', running_build_count) self.msg += '{0}'.format(running_build_count) self.check_thresholds(running_build_count) except jenkins.JenkinsException as _: raise CriticalError(_) query_time = time.time() - start_time self.msg += ' | running_build_count={0:d}'.format(running_build_count) self.msg += self.get_perf_thresholds() self.msg += ' query_time={0:.4f}s'.format(query_time)
def parse(content): try: _ = json.loads(content) if log.isEnabledFor(logging.DEBUG): log.debug(jsonpp(_)) compaction_queue_size = None for bean in _['beans']: if bean['name'] == 'Hadoop:service=HBase,name=RegionServer,sub=Server': if log.isEnabledFor(logging.DEBUG): log.debug('found RegionServer section:') log.debug(jsonpp(bean)) compaction_queue_size = bean['compactionQueueLength'] if not isInt(compaction_queue_size): qquit( 'UNKNOWN', 'non-integer returned for compactionQueueLength! ' + support_msg_api()) return compaction_queue_size except ValueError as _: qquit( 'UNKNOWN', _ + ': failed to parse HBase Master jmx info. ' + support_msg_api()) qquit( 'UNKNOWN', 'RegionServer mbean not found, double check this is pointing to an HBase RegionServer' )
def run(self): server_url = '{proto}://{host}:{port}'.format(proto=self.protocol, host=self.host, port=self.port) try: log.debug('setting up Jenkins connection to %s', server_url) start_time = time.time() server = jenkins.Jenkins(server_url, username=self.user, password=self.password, timeout=self.timeout / 3) if log.isEnabledFor(logging.DEBUG): log.debug('getting user') user = server.get_whoami() log.debug('connected as user %s', jsonpp(user)) if self.list_views: log.debug('getting views') views = server.get_views() if log.isEnabledFor(logging.DEBUG): log.debug("%s", jsonpp(views)) print('Jenkins views:\n') for view in views: print(view['name']) sys.exit(ERRORS['UNKNOWN']) if self.view: log.debug('checking view exists') #assert server.view_exists(self.view) server.assert_view_exists(self.view) log.debug('getting jobs for view %s', self.view) view_jobs = server.get_jobs(view_name=self.view) if log.isEnabledFor(logging.DEBUG): log.debug("%s", jsonpp(view_jobs)) job_count = len(view_jobs) else: log.debug('getting job count') job_count = server.jobs_count() # more efficient with many folders # job_count = server.run_script( # "print(Hudson.instance.getAllItems(" # " hudson.model.AbstractProject).count{" # " !(it instanceof hudson.matrix.MatrixConfiguration)" # " })") query_time = time.time() - start_time log.debug('job count: %s', job_count) if self.view: self.msg += "for view '{0}' ".format(self.view) self.msg += '= {0}'.format(job_count) self.check_thresholds(job_count) except jenkins.JenkinsException as _: raise CriticalError(_) self.msg += ' | job_count={0:d}'.format(job_count) self.msg += self.get_perf_thresholds() self.msg += ' query_time={0:.4f}s'.format(query_time)
def req(self, url_suffix, data=None, request_type='GET'): x_requested_by = self.user url = self.url_base + '/' + url_suffix.lstrip('/') if self.user == 'admin': x_requested_by = os.getenv('USER', self.user) headers = {'X-Requested-By': x_requested_by} log.debug('X-Requested-By: %s', x_requested_by) try: if request_type == 'PUT': log.debug('PUT %s', url) log.debug('PUTing data:\n\n%s' % data) result = requests.put(url, auth=(self.user, self.password), headers=headers, data=data) elif data: log.debug('POST %s', url) log.debug('POSTing data:\n\n%s' % data) result = requests.post(url, auth=(self.user, self.password), headers=headers, data=data) else: log.debug('GET %s', url) result = requests.get(url, auth=(self.user, self.password), headers=headers) except requests.exceptions.RequestException as _: die(_) if log.isEnabledFor(logging.DEBUG): log.debug('headers:\n%s' % '\n'.join([ '%(key)s:%(value)s' % locals() for (key, value) in result.headers.items() ])) # pylint: disable=unused-variable log.debug('status code: %s' % result.status_code) log.debug('body:\n%s' % result.text) if result.status_code != 200: try: message = result.json()['message'] if message and message != result.reason: if log.isEnabledFor(logging.DEBUG): raise requests.exceptions.RequestException('%s %s: %s' \ % (result.status_code, result.reason, message)) else: die('{0} {1}: {2}'.format(result.status_code, result.reason, message)) # raised by ['message'] field not existing except KeyError: pass # raised by .json() No JSON object could be decoded except ValueError: pass result.raise_for_status() return result.text
def parse_is_table_compacting(content): soup = BeautifulSoup(content, 'html.parser') if log.isEnabledFor(logging.DEBUG): log.debug("BeautifulSoup prettified:\n{0}\n{1}".format( soup.prettify(), '=' * 80)) try: headings = soup.findAll('h2') for heading in headings: log.debug("checking heading '%s'", heading) if heading.get_text() == 'Table Attributes': log.debug('found Table Attributes section header') table = heading.find_next('table') log.debug('checking first following table') if log.isEnabledFor(logging.DEBUG): log.debug('table:\n%s\n%s', table.prettify(), '=' * 80) rows = table.findChildren('tr') if len(rows) < 3: qquit( 'UNKNOWN', 'parse error - less than the 3 expected rows in table attributes' ) col_names = rows[0].findChildren('th') if len(col_names) < 3: qquit( 'UNKNOWN', 'parse error - less than the 3 expected column headings' ) first_col = col_names[0].get_text().strip() if first_col != 'Attribute Name': qquit('UNKNOWN', 'parse error - expected first column header to be \'{0}\' but got \'\' instead. '\ .format('Attribute Name') + support_msg()) for row in rows[1:]: cols = row.findChildren('td') if len(cols) < 3: qquit( 'UNKNOWN', 'parse error - less than the 3 expected columns in table attributes. ' + support_msg()) if cols[0].get_text().strip() == 'Compaction': compaction_state = cols[1].get_text().strip() # NONE when enabled, Unknown when disabled if compaction_state in ('NONE', 'Unknown'): return False else: return True qquit( 'UNKNOWN', 'parse error - failed to find Table Attributes section in JSP. ' + support_msg()) except (AttributeError, TypeError): qquit('UNKNOWN', 'failed to parse output. ' + support_msg())
def run(self): server_url = '{proto}://{host}:{port}'.format(proto=self.protocol, host=self.host, port=self.port) try: log.debug('setting up Jenkins connection to %s', server_url) start_time = time.time() server = jenkins.Jenkins(server_url, username=self.user, password=self.password, timeout=self.timeout / 3) if log.isEnabledFor(logging.DEBUG): log.debug('getting user') user = server.get_whoami() log.debug('connected as user %s', jsonpp(user)) if self.list_nodes: log.debug('getting Jenkins nodes') nodes = server.get_nodes() log.debug('nodes: %s', nodes) print('Jenkins nodes:\n') for _ in nodes: print(_['name']) sys.exit(ERRORS['UNKNOWN']) # doesn't find 'master' node despite showing it in the list of nodes, jenkins puts brackets around master if self.node == 'master': self.node = '(master)' node = server.get_node_info(self.node) except jenkins.NotFoundException: raise CriticalError("node '{0}' not found, did you specify the correct name? See --list to see nodes"\ .format(self.node)) except jenkins.JenkinsException as _: raise CriticalError(_) query_time = time.time() - start_time if log.isEnabledFor(logging.DEBUG): log.debug('%s', jsonpp(node)) offline = node['offline'] offline_reason = node['offlineCauseReason'] num_executors = node['numExecutors'] num_executors = int(num_executors) if not isInt(num_executors): raise UnknownError('numExecutors returned non-integer! {0}'.format( support_msg_api())) if offline: self.critical() self.msg += 'offline: {0}'.format(offline_reason) else: self.msg += 'online' self.msg += ', num executors = {0}'.format(num_executors) self.check_thresholds(num_executors) self.msg += ' | num_executors={0:d}'.format(num_executors) self.msg += self.get_perf_thresholds(boundary='lower') self.msg += ' query_time={0:.4f}s'.format(query_time)
def main(self): # DEBUG env var is picked up immediately in pylib utils, do not override it here if so if os.getenv('DEBUG'): log.setLevel(logging.DEBUG) if not log.isEnabledFor(logging.DEBUG) and \ not log.isEnabledFor(logging.ERROR): # do not downgrade logging either log.setLevel(logging.WARN) self.setup() try: self.add_options() self.add_default_opts() except InvalidOptionException as _: self.usage(_) try: self.__parse_args__() # broken # autoflush() # too late # os.environ['PYTHONUNBUFFERED'] = "anything" log.info('Hari Sekhon %s', self.version) log.info(self._github_repo) log.info('verbose level: %s (%s)', self.verbose, logging.getLevelName(log.getEffectiveLevel())) if self.timeout is not None: validate_int(self.timeout, 'timeout', 0, self.timeout_max) log.debug('setting timeout alarm (%s)', self.timeout) signal.signal(signal.SIGALRM, self.timeout_handler) signal.alarm(int(self.timeout)) # if self.options.version: # print(self.version) # sys.exit(ERRORS['UNKNOWN']) self.process_options() self.process_args() try: self.run() except CriticalError as _: qquit('CRITICAL', _) except WarningError as _: qquit('WARNING', _) except UnknownError as _: qquit('UNKNOWN', _) self.__end__() except InvalidOptionException as _: if log.isEnabledFor(logging.DEBUG): log.debug(traceback.format_exc()) self.usage(_) # pragma: no cover except KeyboardInterrupt: # log.debug('Caught control-c...') print('Caught control-c...') # pragma: no cover
def search(term, limit=25): url = 'https://index.docker.io/v1/search?q={0}&n={1}'.format(urllib.quote_plus(term), limit) log.debug('GET %s' % url) try: verify = True # workaround for Travis CI and older pythons - we're not exchanging secret data so this is ok #if os.getenv('TRAVIS'): # verify = False req = requests.get(url, verify=verify) except requests.exceptions.RequestException as _: die(_) log.debug("response: %s %s", req.status_code, req.reason) log.debug("content:\n%s\n%s\n%s", '='*80, req.content.strip(), '='*80) if req.status_code != 200: die("%s %s" % (req.status_code, req.reason)) if not isJson(req.content): die('invalid non-JSON response from DockerHub!') if log.isEnabledFor(logging.DEBUG): print(jsonpp(req.content)) print('='*80) try: data = json.loads(req.content) except KeyError as _: die('failed to parse output from DockerHub (format may have changed?): {0}'.format(_)) return data
def run(self): log.info('testing AWS API call') # there isn't really a .ping() type API endpoint so just connect to IAM and list users ec2 = boto3.client('ec2') #instances = ec2.describe_instances() describe_instances = ec2.get_paginator('describe_instances') statuses = OrderedDict([ ('running', 0), ('terminated', 0), ('stopped', 0), ('stopping', 0), ('shutting-down', 0), ]) flatten = lambda _: [item for sublist in _ for item in sublist] # this might time out if there are a lot of EC2 instances for instances_response in describe_instances.paginate(): if log.isEnabledFor(logging.DEBUG): log.debug('\n\n%s', jsonpp(instances_response)) instances = flatten( [_['Instances'] for _ in instances_response['Reservations']]) for instance in instances: self.instance_count += 1 #if log.isEnabledFor(logging.DEBUG): # log.debug('\n\n%s', instance) statuses[instance['State']['Name']] = statuses.get( instance['State']['Name'], 0) + 1 self.msg = 'AWS EC2 instance total = {}'.format(self.instance_count) self.check_statuses(statuses)
def get_version(self): url = 'http://{host}:{port}/solr/admin/info/system'.format(host=self.host, port=self.port) log.debug('GET %s', url) try: req = requests.get(url) except requests.exceptions.RequestException as _: qquit('CRITICAL', _) log.debug('response: %s %s', req.status_code, req.reason) log.debug('content:\n%s\n%s\n%s', '='*80, req.content.strip(), '='*80) if req.status_code != 200: qquit('CRITICAL', '%s %s' % (req.status_code, req.reason)) # versions 7.0+ if isJson(req.content): json_data = json.loads(req.content) version = json_data['lucene']['solr-spec-version'] else: soup = BeautifulSoup(req.content, 'html.parser') if log.isEnabledFor(logging.DEBUG): log.debug("BeautifulSoup prettified:\n{0}\n{1}".format(soup.prettify(), '='*80)) try: version = soup.find('str', {'name':'solr-spec-version'}).text except (AttributeError, TypeError) as _: qquit('UNKNOWN', 'failed to find parse Solr output. {0}\n{1}'\ .format(support_msg_api(), traceback.format_exc())) return version
def gen_payload(self, services=None): log.debug('generating payload for services: %s', services) if services is None or services == 'all': services = self.get_services() if not isList(services): code_error('non-list passed to gen_payload') # determined from here: # https://community.hortonworks.com/questions/11111/is-there-a-way-to-execute-ambari-service-checks-in.html payload = [ { "RequestSchedule": { "batch": [ { "requests": [] }, { "batch_settings": { "batch_separation_in_seconds": 1, "task_failure_tolerance": 1 } } ] } } ] service_count = len(services) for index in range(service_count): service = services[index] index += 1 commandData = "" if service.upper() == "ZOOKEEPER" : # ZOOKEEPER service check command name is irregular ZOOKEEPER_QUORUM_SERVICE_CHECK, not ZOOKEEPER_SERVICE_CHECK commandData = "{service}_QUORUM_SERVICE_CHECK".format(service=service.upper()) else : commandData = "{service}_SERVICE_CHECK".format(service=service.upper()) payload[0]['RequestSchedule']['batch'][0]['requests'].append( { "order_id": index, "type": "POST", "uri": "/api/v1/clusters/{0}/requests".format(self.cluster), "RequestBodyInfo":{ "RequestInfo": { "command": "{commandData}".format(commandData=commandData) , "context": "{service} Service Check (batch {index} of {total})". format(service=service, index=index, total=service_count) }, "Requests/resource_filters":[ { "service_name": service.upper() } ] } } ) payload_str = json.dumps(payload) if log.isEnabledFor(logging.DEBUG): log.debug('generated payload:\n%s', jsonpp(payload_str)) return payload_str
def main(self): try: # Python 2.x super(NagiosPlugin, self).main() # Python 3.x # super().__init__() # redirect_stderr_stdout() except CriticalError as _: qquit('CRITICAL', _) except WarningError as _: qquit('WARNING', _) except UnknownError as _: qquit('UNKNOWN', _) except CodingError as _: qquit('UNKNOWN', 'Programming Error: {0}. {1}'.format(_, support_msg())) except Exception as _: # pylint: disable=broad-except exception_type = type(_).__name__ if log.isEnabledFor(logging.DEBUG): log.debug("exception: '%s'", exception_type) log.debug(traceback.format_exc()) msg = 'Nagios Plugin Exception: {exception_type}: {msg}'.format(exception_type=exception_type, msg=self.exception_msg()) #msg = ', '.join([x.strip() for x in msg.split('\n')]) # ', ' doesn't look nice for ':\n ...' => ':, ...' (snakebite OutOfNNException) #msg = '\t'.join([x.strip() for x in msg.split('\n')]) #if self.options.verbose > 2: # msg = type(_).__name__ + ': ' + msg msg += '. ' + support_msg() qquit('UNKNOWN', msg)
def get(self, url_suffix, params=None): log.info('querying %s', self.software) url = '{protocol}://{host}:{port}/rest/metrics/{url_suffix}'\ .format(host=self.host, port=self.port, protocol=self.protocol, url_suffix=url_suffix) log.debug('GET %s', url) try: req = requests.get(url, params=params) #req = requests.get(url, auth=HTTPBasicAuth(self.user, self.password)) except requests.exceptions.RequestException as _: errhint = '' if 'BadStatusLine' in str(_.message): errhint = ' (possibly connecting to an SSL secured port without using --ssl?)' elif self.protocol == 'https' and 'unknown protocol' in str( _.message): errhint = ' (possibly connecting to a plain HTTP port with the -S / --ssl switch enabled?)' qquit('CRITICAL', str(_) + errhint) log.debug("response: %s %s", req.status_code, req.reason) log.debug("content:\n%s\n%s\n%s", '=' * 80, req.content.strip(), '=' * 80) if req.status_code != 200: qquit('CRITICAL', '{0}: {1}'.format(req.status_code, req.reason)) json_struct = json.loads(req.content) if log.isEnabledFor(logging.DEBUG): print(jsonpp(req.content)) print('=' * 80) return json_struct
def parse(self, stdout): output = [_ for _ in stdout.split('\n') if _] if len(output) < 2: raise CriticalError("docker image '{repo}' not found! Does not exist or has not been pulled yet?"\ .format(repo=self.docker_image)) name_len = len(self.docker_image) if len(output) > 2: tags = set([line[name_len:name_len + 10].strip() for line in output[1:]]) tags = [tag for tag in tags if tag != '<none>'] tags = sorted(list(tags)) if log.isEnabledFor(logging.DEBUG): for tag in tags: log.debug('found tag: %s', tag) raise UnknownError('too many results returned - did you forget to suffix a specific :tag to ' + \ '--docker-image? (eg. :latest, :1.1). The following tags were found: ' + \ ', '.join(tags) ) header_line = output[0] docker_image_line = output[1] image_header = ' '.join(header_line.split()[2:4]) log.debug('image header column: %s', image_header) if image_header != 'IMAGE ID': raise UnknownError("3rd column in header '{0}' is not 'IMAGE ID' as expected, parsing failed!"\ .format(image_header)) self.msg = "docker image '{repo}'".format(repo=self.docker_image) self.check_id(docker_image_line) self.check_size(docker_image_line)
def check(self, client): log.info('running Docker info') info = client.info() if log.isEnabledFor(logging.DEBUG): log.debug(jsonpp(info)) containers = info['Containers'] running_containers = info['ContainersRunning'] paused_containers = info['ContainersPaused'] stopped_containers = info['ContainersStopped'] self.msg = 'Docker ' if self.running: self.msg += 'running containers = {}'.format(running_containers) self.check_thresholds(running_containers) self.msg += ' | running_containers={}{}'.format(running_containers, self.get_perf_thresholds()) elif self.paused: self.msg += 'paused containers = {}'.format(paused_containers) self.check_thresholds(paused_containers) self.msg += ' | paused_containers={}{}'.format(paused_containers, self.get_perf_thresholds()) elif self.stopped: self.msg += 'stopped containers = {}'.format(stopped_containers) self.check_thresholds(stopped_containers) self.msg += ' | stopped_containers={}{}'.format(stopped_containers, self.get_perf_thresholds()) elif self.total: self.msg += 'total containers = {}'.format(containers) self.check_thresholds(containers) self.msg += ' | total_containers={}{}'.format(containers, self.get_perf_thresholds()) else: self.msg += 'containers = {}, running containers = {}, paused containers = {}, stopped containers = {}'\ .format(containers, running_containers, paused_containers, stopped_containers) self.msg += ' | containers={} running_containers={} paused_containers={} stopped_containers={}'\ .format(containers, running_containers, paused_containers, stopped_containers)
def run(self): self.no_args() host = self.get_opt('host') port = self.get_opt('port') validate_host(host) validate_port(port) expected = self.get_opt('expected') if expected is not None: validate_regex(expected) log.info('expected version regex: %s', expected) url = 'http://%(host)s:%(port)s/' % locals() + self.url_path log.debug('GET %s' % url) try: req = requests.get(url) except requests.exceptions.RequestException as _: raise CriticalError(_) log.debug("response: %s %s", req.status_code, req.reason) log.debug("content:\n%s\n%s\n%s", '=' * 80, req.content.strip(), '=' * 80) if req.status_code != 200: raise CriticalError("%s %s" % (req.status_code, req.reason)) soup = BeautifulSoup(req.content, 'html.parser') if log.isEnabledFor(logging.DEBUG): log.debug("BeautifulSoup prettified:\n{0}\n{1}".format( soup.prettify(), '=' * 80)) self.ok() version = self.parse_version(soup) self.msg = 'HBase {0} version = {1}'.format(self.role, version) if expected is not None and not re.search(expected, version): self.msg += " (expected '{0}')".format(expected) self.critical()
def req(self, url, method='post', body=None): assert isStr(method) log.debug('%s %s', method.upper(), url) headers = {"Content-Type": "application/json", "Accept": "application/json", "JSESSIONID": self.jsessionid} log.debug('headers: %s', headers) start_time = time.time() try: req = getattr(requests, method.lower())(url, #cookies=self.jar, data=body, headers=headers) for cookie_tuple in req.cookies.items(): if cookie_tuple[0] == 'JSESSIONID': self.jsessionid = cookie_tuple[1].rstrip('/') timing = time.time() - start_time except requests.exceptions.RequestException as _: qquit('CRITICAL', _) if log.isEnabledFor(logging.DEBUG): log.debug("response: %s %s", req.status_code, req.reason) content = req.content try: content = jsonpp(req.content).strip() except ValueError: pass log.debug("content:\n%s\n%s\n%s", '='*80, content, '='*80) if req.status_code != 200: info = '' try: info = ': {0}'.format(json.loads(req.content)['result']) except (KeyError, ValueError): pass qquit('CRITICAL', "%s %s%s" % (req.status_code, req.reason, info)) return (req, timing)
def run(self): start_time = time.time() for page in range(1, self.max_pages + 1): url = 'https://registry.hub.docker.com/v2/repositories/{repo}/buildhistory?page={page}'\ .format(repo=self.repo, page=page) req = self.request.get(url) if log.isEnabledFor(logging.DEBUG): log.debug(jsonpp(req.content)) json_data = json.loads(req.content) log.debug('%s out of %s results returned for page %s', len(json_data['results']), json_data['count'], page) if self.process_results(json_data): # not quite as accurate as before as it now includes processing time but close enough query_time = time.time() - start_time if '|' not in self.msg: self.msg += ' |' self.msg += ' query_time={0:.2f}s'.format(query_time) return True extra_info = '' if self.verbose: extra_info = ' ({0} page{1} of API output)'\ .format(self.max_pages, plural(self.max_pages)) raise UnknownError( 'no completed builds found in last {0} builds{1}'.format( self.max_pages * 10, extra_info))
def run(self): tables = self.get_tables() if not tables: die('No Tables Found') if self.get_opt('list_tables'): print('Tables:\n\n' + '\n'.join(tables)) sys.exit(3) tables_to_flush = set() if self.table_regex: log.info('filtering tables based on regex') for table in sorted(list(tables)): if self.table_regex.search(table): tables_to_flush.add(table) else: tables_to_flush = sorted(list(tables)) if log.isEnabledFor(logging.INFO): log.info('Flushing tables:\n\n%s\n', '\n'.join(tables_to_flush)) flush_commands = '\n'.join(["flush '{0}'".format(table) for table in tables_to_flush]) try: # by having stdout and stderr go to the same place more likely the output will be in a sane order process = subprocess.Popen(['hbase', 'shell'], stdin=PIPE, stdout=PIPE, stderr=subprocess.STDOUT) (stdout, _) = process.communicate(input=flush_commands) process.wait() if process.returncode != 0: print('ERROR:', end='') die(stdout) print(stdout) except OSError as _: die("OSError running hbase shell to flush tables: {0}".format(_)) except subprocess.CalledProcessError as _: print('Failed to get tables using HBase shell:\n') print(_.output) sys.exit(_.returncode)
def query(url): log.debug('GET %s' % url) try: verify = True # workaround for Travis CI and older pythons - we're not exchanging secret data so this is ok #if os.getenv('TRAVIS'): # verify = False req = requests.get(url, verify=verify) except requests.exceptions.RequestException as _: die(_) log.debug("response: %s %s", req.status_code, req.reason) log.debug("content:\n%s\n%s\n%s", '='*80, req.content.strip(), '='*80) if req.status_code != 200: die("%s %s" % (req.status_code, req.reason)) if not isJson(req.content): die('invalid non-JSON response from DockerHub!') if log.isEnabledFor(logging.DEBUG): print(jsonpp(req.content)) print('='*80) tag_list = [] try: j = json.loads(req.content) tag_list = [_['name'] for _ in j['results']] # could perhaps stack overflow in some scenario # not as functional programming 'cool' but will do own tail recursion and just while loop instead #if 'next' in j and j['next']: # tag_list += self.query(j['next']) return (tag_list, j['next']) except KeyError as _: die('failed to parse output from DockerHub (format may have changed?): {0}'.format(_))
def run(self): server_url = '{proto}://{host}:{port}'.format(proto=self.protocol, host=self.host, port=self.port) try: log.debug('setting up Jenkins connection to %s', server_url) start_time = time.time() server = jenkins.Jenkins(server_url, username=self.user, password=self.password, timeout=self.timeout / 3) if log.isEnabledFor(logging.DEBUG): log.debug('getting user') user = server.get_whoami() log.debug('connected as user %s', jsonpp(user)) log.debug('getting Jenkins nodes') nodes = server.get_nodes() log.debug('nodes: %s', nodes) node_count = len(nodes) log.debug('node count: %s', node_count) offline_nodes = 0 for node in nodes: if node['offline']: offline_nodes += 1 self.msg += '{0} offline node{1}'.format(offline_nodes, plural(offline_nodes)) self.check_thresholds(offline_nodes) self.msg += ' out of {0} node{1}'.format(node_count, plural(node_count)) except jenkins.JenkinsException as _: raise CriticalError(_) query_time = time.time() - start_time self.msg += ' | offline_nodes={0:d}'.format(offline_nodes) self.msg += self.get_perf_thresholds() self.msg += ' node_count={0:d}'.format(node_count) self.msg += ' query_time={0:.4f}s'.format(query_time)
def get_version(self): url = 'http://{host}:{port}/solr/admin/info/system'.format( host=self.host, port=self.port) log.debug('GET %s', url) try: req = requests.get(url) except requests.exceptions.RequestException as _: qquit('CRITICAL', _) log.debug('response: %s %s', req.status_code, req.reason) log.debug('content:\n%s\n%s\n%s', '=' * 80, req.content.strip(), '=' * 80) if req.status_code != 200: qquit('CRITICAL', '%s %s' % (req.status_code, req.reason)) # versions 7.0+ if isJson(req.content): json_data = json.loads(req.content) version = json_data['lucene']['solr-spec-version'] else: soup = BeautifulSoup(req.content, 'html.parser') if log.isEnabledFor(logging.DEBUG): log.debug("BeautifulSoup prettified:\n{0}\n{1}".format( soup.prettify(), '=' * 80)) try: version = soup.find('str', {'name': 'solr-spec-version'}).text except (AttributeError, TypeError) as _: qquit('UNKNOWN', 'failed to find parse Solr output. {0}\n{1}'\ .format(support_msg_api(), traceback.format_exc())) return version
def check(self, client): log.info('running Docker info') info = client.info() if log.isEnabledFor(logging.DEBUG): log.debug(jsonpp(info)) containers = info['Containers'] running_containers = info['ContainersRunning'] paused_containers = info['ContainersPaused'] stopped_containers = info['ContainersStopped'] self.msg = 'Docker ' if self.running: self.msg += 'running containers = {}'.format(running_containers) self.check_thresholds(running_containers) self.msg += ' | running_containers={}{}'.format( running_containers, self.get_perf_thresholds()) elif self.paused: self.msg += 'paused containers = {}'.format(paused_containers) self.check_thresholds(paused_containers) self.msg += ' | paused_containers={}{}'.format( paused_containers, self.get_perf_thresholds()) elif self.stopped: self.msg += 'stopped containers = {}'.format(stopped_containers) self.check_thresholds(stopped_containers) self.msg += ' | stopped_containers={}{}'.format( stopped_containers, self.get_perf_thresholds()) elif self.total: self.msg += 'total containers = {}'.format(containers) self.check_thresholds(containers) self.msg += ' | total_containers={}{}'.format( containers, self.get_perf_thresholds()) else: self.msg += 'containers = {}, running containers = {}, paused containers = {}, stopped containers = {}'\ .format(containers, running_containers, paused_containers, stopped_containers) self.msg += ' | containers={} running_containers={} paused_containers={} stopped_containers={}'\ .format(containers, running_containers, paused_containers, stopped_containers)
def get_version(self): log.info('querying %s', self.software) url = 'http://{host}:{port}/version'.format(host=self.host, port=self.port) log.debug('GET %s', url) try: req = requests.get(url) except requests.exceptions.RequestException as _: qquit('CRITICAL', _) log.debug("response: %s %s", req.status_code, req.reason) log.debug("content:\n%s\n%s\n%s", '=' * 80, req.content.strip(), '=' * 80) # Special handling for Nginx, expecting 404 rather than usual 200 if req.status_code != 404: qquit( 'CRITICAL', '{0} {1} (expecting 404)'.format(req.status_code, req.reason)) soup = BeautifulSoup(req.content, 'html.parser') if log.isEnabledFor(logging.DEBUG): log.debug("BeautifulSoup prettified:\n{0}\n{1}".format( soup.prettify(), '=' * 80)) try: version = soup.findAll('center')[1].text except (AttributeError, TypeError) as _: qquit('UNKNOWN', 'failed to find parse {0} output. {1}\n{2}'\ .format(self.software, support_msg_api(), traceback.format_exc())) if '/' in version: version = version.split('/')[1] return version
def parse_builds(self, content): log.debug('parsing build info') build = None collected_builds = [] json_data = json.loads(content) if not json_data or \ 'builds' not in json_data or \ not json_data['builds']: qquit( 'UNKNOWN', "no Travis CI builds returned by the Travis API." + " Either the specified repo '{0}' doesn't exist".format( self.repo) + " or no builds have happened yet?" + " Also remember the repo is case sensitive, for example 'harisekhon/nagios-plugins' returns this" + " blank build set whereas 'HariSekhon/nagios-plugins' succeeds" + " in returning latest builds information") builds = json_data['builds'] # get latest finished failed build last_build_number = None found_newer_passing_build = False for _ in builds: # API returns most recent build first # extra check to make sure we're getting the very latest build number and API hasn't changed build_number = _['number'] if not isInt(build_number): raise UnknownError('build number returned is not an integer!') build_number = int(build_number) if last_build_number is None: last_build_number = int(build_number) + 1 if build_number >= last_build_number: raise UnknownError('build number returned is out of sequence, cannot be >= last build returned' + \ '{0}'.format(support_msg_api())) last_build_number = build_number if self.completed: if len(collected_builds) < self.num and _['state'] in ( 'passed', 'finished', 'failed', 'errored'): collected_builds.append(_) elif self.failed: if _['state'] == 'passed': if not collected_builds and not found_newer_passing_build: log.warning("found more recent successful build #%s with state = '%s'" + \ ", you may not need to debug this build any more", _['number'], _['state']) found_newer_passing_build = True elif _['state'] in ('failed', 'errored'): if len(collected_builds) < self.num: collected_builds.append(_) # by continuing to iterate through the rest of the builds we can check # their last_build numbers are descending for extra sanity checking #break elif len(collected_builds) < self.num: collected_builds.append(_) # by continuing to iterate through the rest of the builds we can check # their last_build numbers are descending for extra sanity checking #break if not collected_builds: qquit('UNKNOWN', 'no recent builds found') if log.isEnabledFor(logging.DEBUG): for build in collected_builds: log.debug("build:\n%s", jsonpp(build)) return collected_builds
def search(term, limit=25): url = 'https://index.docker.io/v1/search?q={0}&n={1}'.format( urllib.quote_plus(term), limit) log.debug('GET %s' % url) try: verify = True # workaround for Travis CI and older pythons - we're not exchanging secret data so this is ok #if os.getenv('TRAVIS'): # verify = False req = requests.get(url, verify=verify) except requests.exceptions.RequestException as _: die(_) log.debug("response: %s %s", req.status_code, req.reason) log.debug("content:\n%s\n%s\n%s", '=' * 80, req.content.strip(), '=' * 80) if req.status_code != 200: die("%s %s" % (req.status_code, req.reason)) if not isJson(req.content): die('invalid non-JSON response from DockerHub!') if log.isEnabledFor(logging.DEBUG): print(jsonpp(req.content), file=sys.stderr) print('=' * 80, file=sys.stderr) try: data = json.loads(req.content) except KeyError as _: die('failed to parse output from DockerHub (format may have changed?): {0}' .format(_)) return data
def print_log(self, build=None, job_id=None): if job_id: self.print_job_log(job_id=job_id) log.info('=' * 80) log.info('end of log for job id %s', job_id) log.info('=' * 80 + '\n') else: if not build: code_error('no job id passed to print_log(), nor build to determine job from') log.info('getting job id for build #%s', build['number']) if 'jobs' not in build: raise UnknownError('no jobs field found in build, {0}'.format(support_msg_api)) for _ in build['jobs']: _id = _['id'] url = 'https://api.travis-ci.org/jobs/{id}'.format(id=_id) req = self.request_handler.get(url) # if this raises ValueError it'll be caught by run handler job_data = json.loads(req.content) if log.isEnabledFor(logging.DEBUG): log.debug("job id %s status:\n%s", _id, jsonpp(job_data)) if self.failed is True: if job_data['state'] == 'finished' and job_data['status'] in (None, 1, '1'): job = job_data else: job = job_data if not job: raise UnknownError('no job found in build {0}'.format(build['number'])) self.print_job_log(job=job) log.info('=' * 80) log.info('end of log for build number #%s job id %s', build['number'], job['id']) log.info('=' * 80 + '\n')
def run(self): url = 'https://api.travis-ci.org/repos/{repo}/builds'.format( repo=self.repo) log.debug('GET %s' % url) try: req = requests.get(url) except requests.exceptions.RequestException as _: raise CriticalError(_) log.debug("response: %s %s", req.status_code, req.reason) log.debug("content:\n%s\n%s\n%s", '=' * 80, req.content.strip(), '=' * 80) if req.status_code != 200: raise CriticalError("%s %s" % (req.status_code, req.reason)) if log.isEnabledFor(logging.DEBUG): log.debug("\n{0}".format(jsonpp(req.content))) try: self.parse_results(req.content) except (KeyError, ValueError) as _: exception = traceback.format_exc().split('\n')[-2] # this covers up the traceback info and makes it harder to debug #raise UnknownError('failed to parse expected json response from Travis CI API: {0}'.format(exception)) qquit( 'UNKNOWN', 'failed to parse expected json response from Travis CI API: {0}. {1}' .format(exception, support_msg_api()))
def query(url): log.debug('GET %s' % url) try: verify = True # workaround for Travis CI and older pythons - we're not exchanging secret data so this is ok #if os.getenv('TRAVIS'): # verify = False req = requests.get(url, verify=verify) except requests.exceptions.RequestException as _: die(_) log.debug("response: %s %s", req.status_code, req.reason) log.debug("content:\n%s\n%s\n%s", '=' * 80, req.content.strip(), '=' * 80) if req.status_code != 200: die("%s %s" % (req.status_code, req.reason)) if not isJson(req.content): die('invalid non-JSON response from DockerHub!') if log.isEnabledFor(logging.DEBUG): print(jsonpp(req.content)) print('=' * 80) tag_list = [] try: j = json.loads(req.content) tag_list = [_['name'] for _ in j['results']] # could perhaps stack overflow in some scenario # not as functional programming 'cool' but will do own tail recursion and just while loop instead #if 'next' in j and j['next']: # tag_list += self.query(j['next']) return (tag_list, j['next']) except KeyError as _: die('failed to parse output from DockerHub (format may have changed?): {0}' .format(_))
def check(self, client): # services = client.services.list() # print(services) try: service = client.services.get(self.service) except docker.errors.APIError as _: raise CriticalError(_) if log.isEnabledFor(logging.DEBUG): log.debug(jsonpp(service.attrs)) (mode, replicas, running_tasks, created, updated) = self.parse_service(service) self.msg = "Docker Swarm service '{}' replicas = {}".format( self.service, running_tasks) if mode == 'replicated': self.msg += "/{}".format(replicas) self.check_thresholds(running_tasks) if not running_tasks: self.critical() if mode != 'replicated': self.msg += ", mode = '{}'".format(mode) for _ in ('critical', 'warning'): thresholds = self.get_threshold(_, optional=True).thresholds if thresholds['upper'] or thresholds['lower']: self.critical() self.msg += ' (but --{} replica threshold expects replicated mode!)'.format( _) break self.check_created(created) self.check_updated(updated) self.msg += ' | running_replicas={}{}'.format( running_tasks, self.get_perf_thresholds('lower'))
def parse(self, stdout): if isPythonMinVersion(3): output = [_ for _ in str(stdout).split(r'\n') if _] else: output = [_ for _ in str(stdout).split('\n') if _] log.debug('output = %s', output) if len(output) < 2: raise CriticalError("docker image '{repo}' not found! Does not exist or has not been pulled yet?"\ .format(repo=self.docker_image)) tags = set() for line in output[1:]: log.debug('line: %s', line) line_parts = line.split() if len(line_parts) > 1: tags.add(line_parts[1]) tags = [tag for tag in tags if tag and tag != '<none>'] tags = sorted(list(tags)) if log.isEnabledFor(logging.DEBUG): for tag in tags: log.debug('found tag: %s', tag) if len(tags) > 1: raise UnknownError('too many results returned - did you forget to suffix a specific :tag to ' + \ '--docker-image? (eg. :latest, :1.1). The following tags were found: ' + \ ', '.join(tags) ) header_line = output[0] docker_image_line = output[1] image_header = ' '.join(header_line.split()[2:4]) log.debug('image header column: %s', image_header) if image_header != 'IMAGE ID': raise UnknownError("3rd column in header '{0}' is not 'IMAGE ID' as expected, parsing failed!"\ .format(image_header)) self.msg = "docker image '{repo}'".format(repo=self.docker_image) self.check_id(docker_image_line) self.check_size(docker_image_line)
def main(self): try: # Python 2.x super(NagiosPlugin, self).main() # Python 3.x # super().__init__() # redirect_stderr_stdout() except CriticalError as _: qquit('CRITICAL', _) except WarningError as _: qquit('WARNING', _) except UnknownError as _: qquit('UNKNOWN', _) except CodingError as _: qquit('UNKNOWN', 'Programming Error: {0}. {1}'.format(_, support_msg())) except Exception as _: # pylint: disable=broad-except exception_type = type(_).__name__ if log.isEnabledFor(logging.DEBUG): log.debug("exception: '%s'", exception_type) log.debug(traceback.format_exc()) # exception type is part of the string, so don't double it up by prefixing it again #msg = 'Nagios Plugin Exception: {exception_type}: {msg}'\ # .format(exception_type=exception_type, msg=self.exception_msg()) msg = 'Nagios Plugin Exception: {msg}'.format(msg=self.exception_msg()) #msg = ', '.join([x.strip() for x in msg.split('\n')]) # ', ' doesn't look nice for ':\n ...' => ':, ...' (snakebite OutOfNNException) #msg = '\t'.join([x.strip() for x in msg.split('\n')]) #if self.options.verbose > 2: # msg = type(_).__name__ + ': ' + msg msg += '. ' + support_msg() qquit('UNKNOWN', msg)
def run(self): server_url = '{proto}://{host}:{port}'.format(proto=self.protocol, host=self.host, port=self.port) try: log.debug('setting up Jenkins connection to %s', server_url) start_time = time.time() server = jenkins.Jenkins(server_url, username=self.user, password=self.password, timeout=self.timeout / 3) if log.isEnabledFor(logging.DEBUG): log.debug('getting user') user = server.get_whoami() log.debug('connected as user %s', jsonpp(user)) log.debug('getting plugin info') #plugins = server.get_plugins() # deprecated but .get_plugins() output is not JSON serializable # so must use old deprecated method get_plugins_info() :-/ plugins = server.get_plugins_info() query_time = time.time() - start_time except jenkins.JenkinsException as _: raise CriticalError(_) if log.isEnabledFor(logging.DEBUG): log.debug('%s', jsonpp(plugins)) if self.list_plugins: plugin_list = [] print('Jenkins plugins:\n') for _ in plugins: plugin_list.append(_['longName']) for _ in sorted(plugin_list, key=lambda s: s.lower()): print(_) sys.exit(ERRORS['UNKNOWN']) plugin = None for _ in plugins: if _['longName'].lower() == self.plugin.lower(): plugin = _ break if not plugin: raise CriticalError("plugin '{0}' not found. Try --list to see installed plugins".format(self.plugin)) longname = plugin['longName'] enabled = plugin['enabled'] active = plugin['active'] has_update = plugin['hasUpdate'] self.msg += " plugin '{0}' enabled: {1}, active: {2}".format(longname, enabled, active) if not enabled or not active: self.critical() self.msg += ', update available: {0}'.format(has_update) if self.check_update and has_update: self.warning() self.msg += ' | query_time={0:.4f}s'.format(query_time)
def run(self): server_url = '{proto}://{host}:{port}'.format(proto=self.protocol, host=self.host, port=self.port) try: log.debug('setting up Jenkins connection to %s', server_url) start_time = time.time() server = jenkins.Jenkins(server_url, username=self.user, password=self.password, timeout=self.timeout / 3) if log.isEnabledFor(logging.DEBUG): log.debug('getting user') user = server.get_whoami() log.debug('connected as user %s', jsonpp(user)) if self.list_nodes: log.debug('getting Jenkins nodes') nodes = server.get_nodes() log.debug('nodes: %s', nodes) print('Jenkins nodes:\n') for _ in nodes: print(_['name']) sys.exit(ERRORS['UNKNOWN']) # doesn't find 'master' node despite showing it in the list of nodes, jenkins puts brackets around master if self.node == 'master': self.node = '(master)' node = server.get_node_info(self.node) except jenkins.NotFoundException: raise CriticalError("node '{0}' not found, did you specify the correct name? See --list to see nodes"\ .format(self.node)) except jenkins.JenkinsException as _: raise CriticalError(_) query_time = time.time() - start_time if log.isEnabledFor(logging.DEBUG): log.debug('%s', jsonpp(node)) offline = node['offline'] offline_reason = node['offlineCauseReason'] num_executors = node['numExecutors'] num_executors = int(num_executors) if not isInt(num_executors): raise UnknownError('numExecutors returned non-integer! {0}'.format(support_msg_api())) if offline: self.critical() self.msg += 'offline: {0}'.format(offline_reason) else: self.msg += 'online' self.msg += ', num executors = {0}'.format(num_executors) self.check_thresholds(num_executors) self.msg += ' | num_executors={0:d}'.format(num_executors) self.msg += self.get_perf_thresholds(boundary='lower') self.msg += ' query_time={0:.4f}s'.format(query_time)
def run(self): log.info("querying %s", self.software) url = "{protocol}://{host}:{port}/PolicyManagement/{api_version}/deployments".format( host=self.host, port=self.port, api_version=self.api_version, protocol=self.protocol ) log.debug("GET %s", url) try: req = requests.get(url, auth=HTTPBasicAuth(self.user, self.password)) except requests.exceptions.RequestException as _: errhint = "" if "BadStatusLine" in str(_.message): errhint = " (possibly connecting to an SSL secured port without using --ssl?)" elif self.protocol == "https" and "unknown protocol" in str(_.message): errhint = " (possibly connecting to a plain HTTP port with the -S / --ssl switch enabled?)" qquit("CRITICAL", str(_) + errhint) log.debug("response: %s %s", req.status_code, req.reason) log.debug("content:\n%s\n%s\n%s", "=" * 80, req.content.strip(), "=" * 80) if req.status_code == 400 and req.reason == "Bad Request": qquit( "CRITICAL", "{0}: {1} (possibly new install with no deployments yet?)".format(req.status_code, req.reason), ) if req.status_code != 200: qquit("CRITICAL", "{0}: {1}".format(req.status_code, req.reason)) try: json_list = json.loads(req.content) if log.isEnabledFor(logging.DEBUG): print(jsonpp(json_list)) print("=" * 80) if not isList(json_list): raise ValueError("returned content is not a list") if not json_list: qquit("UNKNOWN", "no deployments found") last_deployment = json_list[0] userid = last_deployment["UserId"] description = last_deployment["Description"] hostname = last_deployment["HostName"] timestamp = last_deployment["timestamp"] last_deploy_datetime = datetime.strptime(timestamp, "%b %d, %Y %H:%M:%S %p") except (KeyError, ValueError) as _: qquit( "UNKNOWN", "error parsing output from {software}: {exception}: {error}. {support_msg}".format( software=self.software, exception=type(_).__name__, error=_, support_msg=support_msg_api() ), ) timedelta = datetime.now() - last_deploy_datetime mins = int(int(timedelta.total_seconds()) / 60) self.msg = "{software} last deployment was at '{timestamp}', {mins} mins ago".format( software=self.software, timestamp=timestamp, mins=mins ) self.check_thresholds(mins) if self.verbose: self.msg += " by user '{userid}', host = '{hostname}', description = '{description}'".format( userid=userid, hostname=hostname, description=description ) self.msg += " | mins_since_last_deployment={mins}{thresholds}".format( mins=mins, thresholds=self.get_perf_thresholds(boundary="lower") )
def parse_output(self, content): soup = BeautifulSoup(content, 'html.parser') if log.isEnabledFor(logging.DEBUG): log.debug("BeautifulSoup prettified:\n{0}\n{1}".format( soup.prettify(), '=' * 80)) # shorter to just catch NoneType attribute error when tag not found and returns None try: basestats = soup.find('div', {'id': 'tab_baseStats'}) table = basestats.find('table') #for table in basestats: rows = table.findAll('tr') headers = rows[0].findAll('th') header_server = headers[0].get_text() header_regions = headers[3].get_text() wider_table = len(headers) > 4 # HBase 1.1 in HDP 2.3: ServerName | Start time | Requests Per Second | Num. Regions # HBase 1.2 (Apache): ServerName | Start time | Version | Requests per Second | Num. Regions if wider_table: header_regions = headers[4].get_text() if header_server != 'ServerName': qquit( 'UNKNOWN', "Table headers in Master UI have changed" + " (got {0}, expected 'ServerName'). ".format(header_server) + support_msg()) if header_regions != 'Num. Regions': qquit( 'UNKNOWN', "Table headers in Master UI have changed" + " (got {0}, expected 'Num. Regions'). ".format( header_regions) + support_msg()) log.debug('%-50s\tnum_regions', 'server') for row in rows[1:]: # this can be something like: # 21689588ba40,16201,1473775984259 # so don't apply isHost() validation because it'll fail FQDN / IP address checks cols = row.findAll('td') server = cols[0].get_text() if self.total_regex.match(server): continue num_regions = cols[3].get_text() if wider_table: num_regions = cols[4].get_text() if not isInt(num_regions): qquit( 'UNKNOWN', "parsing error - got '{0}' for num regions".format( num_regions) + " for server '{1}', was expecting integer.".format( server) + " UI format must have changed" + support_msg()) num_regions = int(num_regions) log.debug('%-50s\t%s', server, num_regions) if self.server_min_regions[ 1] is None or num_regions < self.server_min_regions[1]: self.server_min_regions = (server, num_regions) if self.server_max_regions[ 1] is None or num_regions > self.server_max_regions[1]: self.server_max_regions = (server, num_regions) except (AttributeError, TypeError, IndexError): qquit('UNKNOWN', 'failed to find parse output')
def exception_msg(): err = None if os.getenv('DEBUG') or log.isEnabledFor(logging.DEBUG): err = traceback.format_exc() else: err = traceback.format_exc().split('\n')[-2] err = err.rstrip('\r\n') return err
def run(self): server_url = '{proto}://{host}:{port}'.format(proto=self.protocol, host=self.host, port=self.port) try: log.debug('setting up Jenkins connection to %s', server_url) start_time = time.time() server = jenkins.Jenkins(server_url, username=self.user, password=self.password, timeout=self.timeout / 3) if log.isEnabledFor(logging.DEBUG): log.debug('getting user') user = server.get_whoami() log.debug('connected as user %s', jsonpp(user)) if self.list_jobs: log.debug('getting jobs') #jobs = server.get_jobs() # recursively get all jobs jobs = server.get_all_jobs() # more efficient with many folders # jobs = server.run_script(""" # import groovy.json.JsonBuilder; # # // get all projects excluding matrix configuration # // as they are simply part of a matrix project. # // there may be better ways to get just jobs # items = Jenkins.instance.getAllItems(AbstractProject); # items.removeAll { # it instanceof hudson.matrix.MatrixConfiguration # }; # # def json = new JsonBuilder() # def root = json { # jobs items.collect { # [ # name: it.name, # url: Jenkins.instance.getRootUrl() + it.getUrl(), # color: it.getIconColor().toString(), # fullname: it.getFullName() # ] # } # } # # // use json.toPrettyString() if viewing # println json.toString() # """) print('Jenkins Jobs:\n') for job in jobs: print(job['fullname']) sys.exit(ERRORS['UNKNOWN']) log.debug('checking job exists') if server.job_exists(self.job): self.msg += 'exists' else: self.critical() self.msg += 'does not exist!' except jenkins.JenkinsException as _: raise CriticalError(_) query_time = time.time() - start_time self.msg += ' | query_time={0:.4f}s'.format(query_time)
def process_json(self, content): try: self.json_data = json.loads(content) if log.isEnabledFor(logging.DEBUG): log.debug('JSON prettified:\n\n%s\n%s', jsonpp(self.json_data), '='*80) return self.parse_json(self.json_data) except (KeyError, ValueError) as _: #raise UnknownError('{0}: {1}. {2}'.format(type(_).__name__, _, support_msg_api())) raise UnknownError('{0}. {1}'.format(self.exception_msg(), support_msg_api()))
def parse(content): try: _ = json.loads(content) if log.isEnabledFor(logging.DEBUG): log.debug(jsonpp(_)) compaction_queue_size = None for bean in _['beans']: if bean['name'] == 'Hadoop:service=HBase,name=RegionServer,sub=Server': if log.isEnabledFor(logging.DEBUG): log.debug('found RegionServer section:') log.debug(jsonpp(bean)) compaction_queue_size = bean['compactionQueueLength'] if not isInt(compaction_queue_size): qquit('UNKNOWN', 'non-integer returned for compactionQueueLength! ' + support_msg_api()) return compaction_queue_size except ValueError as _: qquit('UNKNOWN', _ + ': failed to parse HBase Master jmx info. ' + support_msg_api()) qquit('UNKNOWN', 'RegionServer mbean not found, double check this is pointing to an HBase RegionServer')
def check(self, client): log.info('running Docker info') info = client.info() if log.isEnabledFor(logging.DEBUG): log.debug(jsonpp(info)) images = info['Images'] self.msg = 'Docker images = {}'.format(images) self.check_thresholds(images) self.msg += ' | docker_images={}{}'.format(images, self.get_perf_thresholds())
def process_results(self, json_data): for result in json_data['results']: tag = result['dockertag_name'] build_code = result['build_code'] _id = result['id'] # Skip Queued / Building as we're only interested in latest completed build status if int(result['status']) in (0, 3): if log.isEnabledFor(logging.DEBUG): log.debug("skipping queued/in progress build tag '%s', id: %s, build_code: %s", tag, _id, build_code) continue if self.tag and self.tag != tag: if log.isEnabledFor(logging.DEBUG): log.debug("skipping build tag '%s', id: %s, build_code: %s, does not match given --tag %s", tag, _id, build_code, self.tag) continue self.process_result(result) return True return False
def save_cluster(self, cluster, path=''): # log.debug('save_cluster(%s, %s)' % (cluster, name)) if not path: path = os.path.normpath(os.path.join(self.blueprint_dir, cluster)) data = self.get_cluster_blueprint(cluster) # logged in save() # log.info("saving cluster '%s' blueprint to file '%s'" % (cluster, path)) if log.isEnabledFor(logging.DEBUG): log.debug("cluster '%s' blueprint content = '%s'" % (cluster, data)) self.save(cluster, path, data)
def save_blueprint(self, blueprint, path=''): # log.debug('save_blueprint(%s, %s' % (blueprint, name)) if not path: path = os.path.normpath(os.path.join(self.blueprint_dir, blueprint)) data = self.get_blueprint(blueprint) # logged in save() # log.info("saving blueprint '%s' to file '%s" % (blueprint, path)) if log.isEnabledFor(logging.DEBUG): log.debug("blueprint '%s' content = '%s'" % (blueprint, data)) self.save(blueprint, path, data)
def get_mounts(): try: with open('/proc/mounts', 'r') as _: lines = _.readlines() if log.isEnabledFor(logging.DEBUG): for line in lines: log.debug('/proc/mounts: %s', line.rstrip('\n')) return lines except IOError as _: raise UnknownError(_)
def check(self, client): log.info('running Docker info') swarm = client.swarm if log.isEnabledFor(logging.DEBUG): log.debug(jsonpp(swarm.attrs)) if swarm.attrs: self.msg = 'Docker Swarm enabled' else: self.msg = 'Docker Swarm not enabled' self.critical()
def req(self, url_suffix, data=None, request_type='GET'): x_requested_by = self.user url = self.url_base + '/' + url_suffix.lstrip('/') if self.user == 'admin': x_requested_by = os.getenv('USER', self.user) headers = {'X-Requested-By': x_requested_by} log.debug('X-Requested-By: %s', x_requested_by) try: if request_type == 'PUT': log.debug('PUT %s', url) log.debug('PUTing data:\n\n%s' % data) result = requests.put(url, auth=(self.user, self.password), headers=headers, data=data) elif data: log.debug('POST %s', url) log.debug('POSTing data:\n\n%s' % data) result = requests.post(url, auth=(self.user, self.password), headers=headers, data=data) else: log.debug('GET %s', url) result = requests.get(url, auth=(self.user, self.password), headers=headers) except requests.exceptions.RequestException as _: die(_) if log.isEnabledFor(logging.DEBUG): log.debug('headers:\n%s' % '\n'.join(['%(key)s:%(value)s' % locals() for (key, value) in result.headers.items()])) # pylint: disable=unused-variable log.debug('status code: %s' % result.status_code) log.debug('body:\n%s' % result.text) if result.status_code != 200: try: message = result.json()['message'] if message and message != result.reason: if log.isEnabledFor(logging.DEBUG): raise requests.exceptions.RequestException('%s %s: %s' \ % (result.status_code, result.reason, message)) else: die('{0} {1}: {2}'.format(result.status_code, result.reason, message)) # raised by ['message'] field not existing except KeyError: pass # raised by .json() No JSON object could be decoded except ValueError: pass result.raise_for_status() return result.text
def send_blueprint(self, name, data): # log.debug('save_blueprint(%s, %s)' % (name, data)) blueprints = self.get_blueprints() if name in blueprints: log.warn("blueprint with name '%s' already exists" % name) log.info("sending blueprint '%s'" % name) if log.isEnabledFor(logging.DEBUG): log.debug("blueprint data = '%s'" % data) # not exposing this to user via switches - shouldn't be using this right now # return self.send('blueprints/%s?validate_topology=false' % name, data) return self.send('blueprints/%s' % name, data)
def parse_is_table_compacting(content): soup = BeautifulSoup(content, 'html.parser') if log.isEnabledFor(logging.DEBUG): log.debug("BeautifulSoup prettified:\n{0}\n{1}".format(soup.prettify(), '='*80)) try: headings = soup.findAll('h2') for heading in headings: log.debug("checking heading '%s'", heading) if heading.get_text() == 'Table Attributes': log.debug('found Table Attributes section header') table = heading.find_next('table') log.debug('checking first following table') if log.isEnabledFor(logging.DEBUG): log.debug('table:\n%s\n%s', table.prettify(), '='*80) rows = table.findChildren('tr') if len(rows) < 3: qquit('UNKNOWN', 'parse error - less than the 3 expected rows in table attributes') col_names = rows[0].findChildren('th') if len(col_names) < 3: qquit('UNKNOWN', 'parse error - less than the 3 expected column headings') first_col = col_names[0].get_text().strip() if first_col != 'Attribute Name': qquit('UNKNOWN', 'parse error - expected first column header to be \'{0}\' but got \'\' instead. '\ .format('Attribute Name') + support_msg()) for row in rows[1:]: cols = row.findChildren('td') if len(cols) < 3: qquit('UNKNOWN', 'parse error - less than the 3 expected columns in table attributes. ' + support_msg()) if cols[0].get_text().strip() == 'Compaction': compaction_state = cols[1].get_text().strip() # NONE when enabled, Unknown when disabled if compaction_state in ('NONE', 'Unknown'): return False else: return True qquit('UNKNOWN', 'parse error - failed to find Table Attributes section in JSP. ' + support_msg()) except (AttributeError, TypeError): qquit('UNKNOWN', 'failed to parse output. ' + support_msg())
def check(self, client): log.info('running Docker info') info = client.info() if log.isEnabledFor(logging.DEBUG): log.debug(jsonpp(info)) swarm = info['Swarm'] if 'Cluster' not in swarm: raise CriticalError('Docker is not a member of a Swarm') state = swarm['LocalNodeState'] self.msg = 'Docker Swarm node state = {}'.format(state) if state != 'active': self.critical()
def list(self, url_suffix): self.url = self.url_base + '/' + url_suffix try: response = self.get(url_suffix) except requests.exceptions.RequestException as _: err = 'failed to fetch list of Ambari Blueprints: %s' % _ # log.critical(err) qquit('CRITICAL', err) json_data = json.loads(response) if log.isEnabledFor(logging.DEBUG): log.debug("json_data = " + jsonpp(json_data)) return json_data