def run(self): self.no_args() directory = self.get_opt('directory') validate_directory(directory) directory = os.path.abspath(directory) self.remote = self.get_opt('remote') validate_chars(self.remote, 'remote', r'A-Za-z0-9_\.-') try: repo = git.Repo(directory) except git.InvalidGitRepositoryError as _: raise CriticalError("directory '{}' does not contain a valid Git repository!".format(directory)) try: if not self.get_opt('no_fetch'): log.info('fetching from remote repo: {}'.format(self.remote)) repo.git.fetch(self.remote) branch = repo.active_branch log.info('active branch: %s', branch) commits_behind = repo.iter_commits('{branch}..{remote}/{branch}'.format(remote=self.remote, branch=branch)) commits_ahead = repo.iter_commits('{remote}/{branch}..{branch}'.format(remote=self.remote, branch=branch)) num_commits_behind = sum(1 for c in commits_behind) num_commits_ahead = sum(1 for c in commits_ahead) # happens with detached HEAD checkout like Travis CI does except TypeError as _: raise CriticalError(_) except git.GitCommandError as _: raise CriticalError(', '.join(str(_.stderr).split('\n'))) self.msg = "git checkout branch '{}' is ".format(branch) if num_commits_ahead + num_commits_behind == 0: self.ok() self.msg += 'up to date with' else: self.critical() self.msg += '{} commits behind, {} commits ahead of'.format(num_commits_behind, num_commits_ahead) self.msg += " remote '{}'".format(self.remote) self.msg += ' | commits_behind={};0;0 commits_ahead={};0;0'.format(num_commits_behind, num_commits_ahead)
def run(self): self.no_args() host = self.get_opt('host') port = self.get_opt('port') table = self.get_opt('table') validate_host(host) validate_port(port) validate_database_tablename(table) # raises 500 error if table doesn't exist url = 'http://%(host)s:%(port)s/table.jsp?name=%(table)s' % locals() log.debug('GET %s', url) try: req = requests.get(url) except requests.exceptions.RequestException as _: raise CriticalError(_) log.debug("response: %s %s", req.status_code, req.reason) log.debug("content:\n%s\n%s\n%s", '='*80, req.content.strip(), '='*80) if req.status_code != 200: info = '' #if req.status_code == '500' and 'TableNotFoundException' in req.content: if 'TableNotFoundException' in req.content: info = 'table not found' raise CriticalError("%s %s %s" % (req.status_code, req.reason, info)) is_table_compacting = self.parse_is_table_compacting(req.content) self.msg = 'HBase table \'{0}\' '.format(table) if is_table_compacting: self.warning() self.msg += 'has compaction in progress' else: self.msg += 'has no compaction in progress'
def run(self): self.no_args() directory = self.get_opt('directory') validate_directory(directory) directory = os.path.abspath(directory) expected_branch = self.get_opt('branch') if expected_branch is None: self.usage('expected branch not defined') if not re.match(r'^[\w-]+$', expected_branch): self.usage( 'Invalid branch name given, must be alphanumeric, may contain dashes' ) log_option('expected branch', expected_branch) repo = git.Repo(directory) try: current_branch = repo.active_branch.name # happens with detached HEAD checkout like Travis CI does except TypeError as _: raise CriticalError(_) if current_branch == expected_branch: self.ok() self.msg = "branch '{0}' currently checked out in directory '{1}'"\ .format(current_branch, directory) else: raise CriticalError( "branch '{0}' checked out, expecting '{1}' in directory '{2}'". format(current_branch, expected_branch, directory))
def run(self): self.no_args() host = self.get_opt('host') port = self.get_opt('port') validate_host(host) validate_port(port) expected = self.get_opt('expected') if expected is not None: validate_regex(expected) log.info('expected version regex: %s', expected) url = 'http://%(host)s:%(port)s/' % locals() + self.url_path log.debug('GET %s' % url) try: req = requests.get(url) except requests.exceptions.RequestException as _: raise CriticalError(_) log.debug("response: %s %s", req.status_code, req.reason) log.debug("content:\n%s\n%s\n%s", '=' * 80, req.content.strip(), '=' * 80) if req.status_code != 200: raise CriticalError("%s %s" % (req.status_code, req.reason)) soup = BeautifulSoup(req.content, 'html.parser') if log.isEnabledFor(logging.DEBUG): log.debug("BeautifulSoup prettified:\n{0}\n{1}".format( soup.prettify(), '=' * 80)) self.ok() version = self.parse_version(soup) self.msg = 'HBase {0} version = {1}'.format(self.role, version) if expected is not None and not re.search(expected, version): self.msg += " (expected '{0}')".format(expected) self.critical()
def parse_json(self, json_data): policy = None if self.policy_id: policy = json_data policy_list = [policy] if not self.policy_id or self.list_policies: policy_list = json_data['vXPolicies'] if not policy_list: raise CriticalError( 'Ranger policy not found! (check the --name is correct and that it really exists)' ) host_info = '' if self.verbose: host_info = " at '{0}:{1}'".format(self.host, self.port) if not isList(policy_list): raise UnknownError("non-list returned for json_data[vXPolicies] by Ranger{0}"\ .format(host_info)) if self.list_policies: self.print_policies(policy_list) sys.exit(ERRORS['UNKNOWN']) if policy is None and self.policy_name: for _ in policy_list: if _['policyName'] == self.policy_name: policy = _ break # this won't apply when --policy-id is given as it's a targeted query that will get 404 before this if not policy: raise CriticalError("no matching policy found with name '{name}' in policy list "\ .format(name=self.policy_name) + "returned by Ranger{host_info}".format(host_info=host_info)) self.check_policy(policy)
def parse_json(self, json_data): repository = None if self.repository_id: repository = json_data repository_list = [repository] if not self.repository_id or self.list_repositories: repository_list = json_data['vXRepositories'] if not repository_list: raise CriticalError('Ranger repository not found! (check the --name is correct and that it really exists)') host_info = '' if self.verbose: host_info = " at '{0}:{1}'".format(self.host, self.port) if not isList(repository_list): raise UnknownError("non-list returned for json_data[vXRepositories] by Ranger{0}"\ .format(host_info)) if self.list_repositories: self.print_repositories(repository_list) sys.exit(ERRORS['UNKNOWN']) if repository is None and self.repository_name: for _ in repository_list: if _['name'] == self.repository_name: repository = _ break # this won't apply when --id is given as it's a targeted query that will get 404 before this # will only apply to --name based queries if not repository: raise CriticalError("no matching repository found with name '{name}' in repository list "\ .format(name=self.repository_name) + "returned by Ranger{host_info}".format(host_info=host_info)) self.check_repository(repository)
def run(self): url = 'https://api.travis-ci.org/repos/{repo}/builds'.format( repo=self.repo) log.debug('GET %s' % url) try: req = requests.get(url) except requests.exceptions.RequestException as _: raise CriticalError(_) log.debug("response: %s %s", req.status_code, req.reason) log.debug("content:\n%s\n%s\n%s", '=' * 80, req.content.strip(), '=' * 80) if req.status_code != 200: raise CriticalError("%s %s" % (req.status_code, req.reason)) if log.isEnabledFor(logging.DEBUG): log.debug("\n{0}".format(jsonpp(req.content))) try: self.parse_results(req.content) except (KeyError, ValueError) as _: exception = traceback.format_exc().split('\n')[-2] # this covers up the traceback info and makes it harder to debug #raise UnknownError('failed to parse expected json response from Travis CI API: {0}'.format(exception)) qquit( 'UNKNOWN', 'failed to parse expected json response from Travis CI API: {0}. {1}' .format(exception, support_msg_api()))
def run(self): start_time = time.time() try: if self.base_url: log.info('connecting to Docker via base url: %s', self.base_url) client = docker.DockerClient( base_url=self.base_url, timeout=max(self.timeout - 1, 1), tls=self.tls_config, user_agent='Hari Sekhon {}'.format(prog)) else: log.info('connecting to Docker via environment') client = docker.from_env() # exception happens here self.check(client) except docker.errors.APIError as _: raise CriticalError('Docker API call FAILED: {}'.format(_)) except requests.ConnectionError as _: raise CriticalError('Docker connection failed: {}'.format(_)) except docker.errors.DockerException as _: raise UnknownError(_) query_time = time.time() - start_time if '|' not in self.msg: self.msg += ' |' if ' query_time=' not in self.msg: self.msg += ' query_time={0:.4f}s'.format(query_time)
def run(self): self.no_args() directory = self.get_opt('directory') validate_directory(directory) directory = os.path.abspath(directory) expected_branch = self.get_opt('branch') if expected_branch is None: self.usage('expected branch not defined') if not re.match(r'^[\w\s-]+$', expected_branch): self.usage('Invalid branch name given, must be alphanumeric' + \ ', may contain dashes and spaces for detached HEADs') log_option('expected branch', expected_branch) try: repo = git.Repo(directory) except git.InvalidGitRepositoryError as _: raise CriticalError( "directory '{}' does not contain a valid Git repository!". format(directory)) try: current_branch = repo.active_branch.name # happens with detached HEAD checkout like Travis CI does except TypeError as _: raise CriticalError(_) if current_branch == expected_branch: self.ok() self.msg = "git branch '{0}' currently checked out in directory '{1}'"\ .format(current_branch, directory) else: raise CriticalError( "git branch '{current_branch}' checked out".format( current_branch=current_branch) + ", expecting branch '{expected_branch}' in directory '{directory}'" .format(expected_branch=expected_branch, directory=directory))
def parse_json(self, json_data): if not isList(json_data): raise UnknownError('non-list returned by Atlas metadata server instance at {0}:{1}! {2}'\ .format(self.host, self.port, support_msg_api())) if len(json_data) < 1: raise CriticalError('no entities found!') if self.list_entities: print('=' * 100) print('{0:40} {1:25} {2}'.format('ID', 'Type', 'Name')) print('=' * 100) for entity in json_data: name = self.get_key(entity, 'name') _id = self.get_key(entity, 'id') _type = self.get_key(entity, 'type') print('{0:40} {1:25} {2}'.format(_id, _type, name)) sys.exit(ERRORS['UNKNOWN']) if self.entity_id: if len(json_data) > 1: raise CriticalError('more than one matching entity returned!') json_data = json_data[0] elif self.entity_name: for entity in json_data: if self.entity_name == self.get_key(entity, 'name'): # Recursion - a bit too clever but convenient self.entity_name = None self.entity_id = self.get_key(entity, 'id') self.path += '/{0}'.format(self.entity_id) req = self.query() self.process_json(req.content) # escape recursion return raise CriticalError("entity with name '{name}' not found!".format(name=self.entity_name)) name = self.get_key(json_data, 'name') state = self.get_key(json_data, 'state') # available for HDFS path but not DB #path = self.get_key(json_data, 'path') _type = self.get_key(json_data, 'type') tags = [] if 'trait_names' in json_data: tags = self.get_key(json_data, 'trait_names') #traits = self.get_key(json_data, 'traits') version = self.get_key(json_data, 'version') modified_date = self.get_key(json_data, 'modified_time') self.msg = " '{name}' exists, state='{state}'".format(name=name, state=state) if state != 'ACTIVE': self.critical() self.msg += " (expected 'ACTIVE')" self.msg += ", type='{type}'".format(type=_type) self.check_type(_type) #if self.verbose: self.msg += ", tags='{tags}'".format(tags=','.join(tags)) self.check_missing_tags(tags) #if self.verbose: #self.msg += ", traits='{traits}'".format(traits=','.join(traits)) #self.check_missing_traits(traits) if self.verbose: self.msg += ", modified_date='{modified_date}', version='{version}'".format( modified_date=modified_date, version=version )
def run(self): server_url = '{proto}://{host}:{port}'.format(proto=self.protocol, host=self.host, port=self.port) try: log.debug('setting up Jenkins connection to %s', server_url) start_time = time.time() server = jenkins.Jenkins(server_url, username=self.user, password=self.password, timeout=self.timeout / 3) if log.isEnabledFor(logging.DEBUG): log.debug('getting user') user = server.get_whoami() log.debug('connected as user %s', jsonpp(user)) if self.list_nodes: log.debug('getting Jenkins nodes') nodes = server.get_nodes() log.debug('nodes: %s', nodes) print('Jenkins nodes:\n') for _ in nodes: print(_['name']) sys.exit(ERRORS['UNKNOWN']) # doesn't find 'master' node despite showing it in the list of nodes, jenkins puts brackets around master if self.node == 'master': self.node = '(master)' node = server.get_node_info(self.node) except jenkins.NotFoundException: raise CriticalError("node '{0}' not found, did you specify the correct name? See --list to see nodes"\ .format(self.node)) except jenkins.JenkinsException as _: raise CriticalError(_) query_time = time.time() - start_time if log.isEnabledFor(logging.DEBUG): log.debug('%s', jsonpp(node)) offline = node['offline'] offline_reason = node['offlineCauseReason'] num_executors = node['numExecutors'] num_executors = int(num_executors) if not isInt(num_executors): raise UnknownError('numExecutors returned non-integer! {0}'.format( support_msg_api())) if offline: self.critical() self.msg += 'offline: {0}'.format(offline_reason) else: self.msg += 'online' self.msg += ', num executors = {0}'.format(num_executors) self.check_thresholds(num_executors) self.msg += ' | num_executors={0:d}'.format(num_executors) self.msg += self.get_perf_thresholds(boundary='lower') self.msg += ' query_time={0:.4f}s'.format(query_time)
def check_job_launch_response_code(self, req): if req.status_code == 409: error_message = self.parse_travis_error(req) error_message += " (if you've just retriggered this you can avoid this error " + \ "using the --ignore-running switch)" if self.get_opt('ignore_running'): log.info('job already running (ignoring)') else: log.info('job already running') raise CriticalError('{0} {1}: {2}'.format(req.status_code, req.reason, error_message)) elif req.status_code != 202: error_message = self.parse_travis_error(req) raise CriticalError("{0} {1}: {2}".format(req.status_code, req.reason, error_message))
def parse_table(self, row): #log.debug(row) user = row[self.indicies['user_index']] # 'hari.sekhon' in '*****@*****.**' in kerberos if self.re_ignored_users and self.re_ignored_users.match(user): log.debug('skipping row for ignored user %s: %s', user, row) return (None, None) database = row[self.indicies['database_index']].strip() table = row[self.indicies['table_index']].strip() if not database or not table or not self.re_table.match('{}.{}'.format(database, table)): #log.info('table not found in fields for row: %s', row) operation = row[self.indicies['operation_index']] if operation in self.operations_to_ignore: return (None, None) elif operation == 'QUERY': query = row[self.indicies['sql_index']] # cheaper than re_ignore to pre-filter if query in ('GET_TABLES', 'GET_SCHEMAS', 'INVALIDATE METADATA'): return (None, None) (database, table) = self.get_db_table_from_resource(row) if database and table: pass else: log.debug('database/table not found in row: %s', row) log.debug('trying to parse: %s', query) match = self.re_select_from_table.search(query) if match: table = match.group(1) if '.' in table: (database, table) = table.split('.', 1) # could use .search but all these seem to be at beginning elif self.re_ignore.match(query): return (None, None) else: log.warning('failed to parse database/table from query: %s', query) return (None, None) else: log.debug('database/table not found in row and operation is not a query to parse: %s', row) return (None, None) if not table and not database: return (None, None) table = table.lower().strip('`') database = database.lower().strip('`') if ' ' in table: raise CriticalError('table \'{}\' has spaces - parsing error for row: {}'.format(table, row)) if ' ' in database: raise CriticalError('database \'{}\' has spaces - parsing error for row: {}'.format(database, row)) if table == 'null': raise CriticalError('table == null - parsing error for row: {}'.format(row)) return (database, table)
def check_response_code(self, req): if req.status_code != 200: if req.status_code == 404 and req.reason == 'Object Not Found': self.msg = "RabbitMQ vhost '{0}' not found!".format(self.vhost) raise CriticalError(self.msg) else: self.check_response_code_orig(req)
def parse(self, req): soup = BeautifulSoup(req.content, 'html.parser') last_heartbeat = None try: self.list_workers(soup) heartbeat_col_header = soup.find( 'th', text='Node Name').find_next_sibling().get_text() # make sure ordering of columns is as we expect so we're parsing the correct number for heartbeat lag if heartbeat_col_header != 'Last Heartbeat': code_error( "heartbeat column header '{}' != Last Heartbeat".format( heartbeat_col_header)) last_heartbeat = soup.find( 'th', text=self.node).find_next_sibling().get_text() if last_heartbeat is None: raise AttributeError except (AttributeError, TypeError): raise CriticalError("{0} worker '{1}' not found among list of live workers!"\ .format(self.software, self.node)) if not isInt(last_heartbeat): raise UnknownError("last heartbeat '{0}' for node '{1}' is not an integer, possible parsing error! {2}"\ .format(last_heartbeat, self.node, support_msg())) self.msg = "{0} worker '{1}' last heartbeat = {2} secs ago".format( self.software, self.node, last_heartbeat) self.check_thresholds(last_heartbeat) self.msg += ' | last_heartbeat={0}s{1}'.format( last_heartbeat, self.get_perf_thresholds())
def check(self, client): # services = client.services.list() # print(services) try: service = client.services.get(self.service) except docker.errors.APIError as _: raise CriticalError(_) if log.isEnabledFor(logging.DEBUG): log.debug(jsonpp(service.attrs)) (mode, replicas, running_tasks, created, updated) = self.parse_service(service) self.msg = "Docker Swarm service '{}' replicas = {}".format( self.service, running_tasks) if mode == 'replicated': self.msg += "/{}".format(replicas) self.check_thresholds(running_tasks) if not running_tasks: self.critical() if mode != 'replicated': self.msg += ", mode = '{}'".format(mode) for _ in ('critical', 'warning'): thresholds = self.get_threshold(_, optional=True).thresholds if thresholds['upper'] or thresholds['lower']: self.critical() self.msg += ' (but --{} replica threshold expects replicated mode!)'.format( _) break self.check_created(created) self.check_updated(updated) self.msg += ' | running_replicas={}{}'.format( running_tasks, self.get_perf_thresholds('lower'))
def run(self): server_url = '{proto}://{host}:{port}'.format(proto=self.protocol, host=self.host, port=self.port) try: log.debug('setting up Jenkins connection to %s', server_url) start_time = time.time() server = jenkins.Jenkins(server_url, username=self.user, password=self.password, timeout=self.timeout / 3) if log.isEnabledFor(logging.DEBUG): log.debug('getting user') user = server.get_whoami() log.debug('connected as user %s', jsonpp(user)) log.debug('fetching running builds') running_builds = server.get_running_builds() if log.isEnabledFor(logging.DEBUG): log.debug('%s', jsonpp(running_builds)) running_build_count = len(running_builds) log.debug('running build count: %s', running_build_count) self.msg += '{0}'.format(running_build_count) self.check_thresholds(running_build_count) except jenkins.JenkinsException as _: raise CriticalError(_) query_time = time.time() - start_time self.msg += ' | running_build_count={0:d}'.format(running_build_count) self.msg += self.get_perf_thresholds() self.msg += ' query_time={0:.4f}s'.format(query_time)
def run(self): ############### # == Write == # start = time.time() self.write() end = time.time() self._write_timing = end - start log.info('read in %s secs', self._read_timing) ############## # == Read == # # Python 2.x super(KeyWriteNagiosPlugin, self).run() # Python 3.x # super().run() if self._read_value != self._write_value: raise CriticalError( "read back value '%s' does not match written value '%s'!" % (self._read_value, self._write_value)) ################ # == Delete == # start = time.time() self.delete() end = time.time() self._delete_timing = end - start log.info('read in %s secs', self._read_timing)
def parse_json(self, json_data): log.info('parsing response') try: live_nodes = json_data['beans'][0]['LiveNodes'] live_node_data = json.loads(live_nodes) num_datanodes = len(live_node_data) if num_datanodes < 1: raise CriticalError("no live datanodes returned by JMX API from namenode '{0}:{1}'"\ .format(self.host, self.port)) min_space = None max_space = 0 for datanode in live_node_data: used_space = live_node_data[datanode]['usedSpace'] if not isInt(used_space): raise UnknownError( 'usedSpace is not an integer! {0}'.format( support_msg_api())) used_space = int(used_space) log.info("datanode '%s' used space = %s", datanode, used_space) if min_space is None or used_space < min_space: min_space = used_space if used_space > max_space: max_space = used_space divisor = max_space if divisor < 1: log.info( 'min used space < 1, resetting divisor to 1 (% will likely be very high)' ) divisor = 1 assert max_space >= min_space largest_imbalance_pc = float('{0:.2f}'.format( ((max_space - min_space) / divisor) * 100)) assert largest_imbalance_pc >= 0 self.ok() self.msg = '{0}% HDFS imbalance on space used'.format( largest_imbalance_pc) self.check_thresholds(largest_imbalance_pc) self.msg += ' across {0:d} datanode{1}'.format( num_datanodes, plural(num_datanodes)) if self.verbose: self.msg += ', min used space = {0}, max used space = {1}'.format( min_space, max_space) if self.verbose and (self.is_warning() or self.is_critical()): self.msg += ' [imbalanced nodes: ' for datanode in live_node_data: used_space = live_node_data[datanode]['usedSpace'] if (used_space / max_space * 100) > self.thresholds['warning']['upper']: self.msg += '{0}({1:.2f%}),'.format( datanode, used_space) self.msg = self.msg.rstrip(',') + ']' self.msg += " | 'HDFS imbalance on space used %'={0}".format( largest_imbalance_pc) self.msg += self.get_perf_thresholds() self.msg += " num_datanodes={0}".format(num_datanodes) self.msg += " min_used_space={0}".format(min_space) self.msg += " max_used_space={0}".format(max_space) except KeyError as _: raise UnknownError("failed to parse json returned by NameNode at '{0}:{1}': {2}. {3}"\ .format(self.host, self.port, _, support_msg_api()))
def run(self): try: super(CheckRabbitMQ, self).run() except (pika.exceptions.AMQPError, pika.exceptions.ChannelError, pika.exceptions.RecursionError): err = self.exception_msg() raise CriticalError(err)
def parse(self, stdout): if isPythonMinVersion(3): output = [_ for _ in str(stdout).split(r'\n') if _] else: output = [_ for _ in str(stdout).split('\n') if _] log.debug('output = %s', output) if len(output) < 2: raise CriticalError("docker image '{repo}' not found! Does not exist or has not been pulled yet?"\ .format(repo=self.docker_image)) tags = set() for line in output[1:]: log.debug('line: %s', line) line_parts = line.split() if len(line_parts) > 1: tags.add(line_parts[1]) tags = [tag for tag in tags if tag and tag != '<none>'] tags = sorted(list(tags)) if log.isEnabledFor(logging.DEBUG): for tag in tags: log.debug('found tag: %s', tag) if len(tags) > 1: raise UnknownError('too many results returned - did you forget to suffix a specific :tag to ' + \ '--docker-image? (eg. :latest, :1.1). The following tags were found: ' + \ ', '.join(tags) ) header_line = output[0] docker_image_line = output[1] image_header = ' '.join(header_line.split()[2:4]) log.debug('image header column: %s', image_header) if image_header != 'IMAGE ID': raise UnknownError("3rd column in header '{0}' is not 'IMAGE ID' as expected, parsing failed!"\ .format(image_header)) self.msg = "docker image '{repo}'".format(repo=self.docker_image) self.check_id(docker_image_line) self.check_size(docker_image_line)
def run(self): try: super(CheckKafka, self).run() #except KafkaError as _: #raise CriticalError(_) except KafkaError: raise CriticalError(self.exception_msg())
def run(self): expected = self.get_opt('expected') if expected is not None: validate_regex(expected) log.info('expected version regex: %s', expected) cmd = 'consul version' log.debug('cmd: ' + cmd) proc = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT) (stdout, _) = proc.communicate() log.debug('stdout: ' + str(stdout)) returncode = proc.wait() log.debug('returncode: ' + str(returncode)) if returncode != 0 or (stdout is not None and 'Error' in stdout): raise CriticalError('consul returncode: {0}, output: {1}'.format( returncode, stdout)) version = None for line in str(stdout).split('\n'): match = self.version_regex.match(line) if match: version = match.group(1) if not version: raise UnknownError( 'Consul version not found in output. Consul output may have changed. {0}' .format(support_msg())) if not isVersion(version): raise UnknownError( 'Consul version unrecognized \'{0}\'. {1}'.format( version, support_msg())) self.ok() self.msg = 'Consul version = {0}'.format(version) if expected is not None and not re.search(expected, version): self.msg += " (expected '{0}')".format(expected) self.critical()
def run(self): server_url = '{proto}://{host}:{port}'.format(proto=self.protocol, host=self.host, port=self.port) try: log.debug('setting up Jenkins connection to %s', server_url) start_time = time.time() server = jenkins.Jenkins(server_url, username=self.user, password=self.password, timeout=self.timeout / 3) if log.isEnabledFor(logging.DEBUG): log.debug('getting user') user = server.get_whoami() log.debug('connected as user %s', jsonpp(user)) log.debug('getting Jenkins nodes') nodes = server.get_nodes() log.debug('nodes: %s', nodes) node_count = len(nodes) log.debug('node count: %s', node_count) offline_nodes = 0 for node in nodes: if node['offline']: offline_nodes += 1 self.msg += '{0} offline node{1}'.format(offline_nodes, plural(offline_nodes)) self.check_thresholds(offline_nodes) self.msg += ' out of {0} node{1}'.format(node_count, plural(node_count)) except jenkins.JenkinsException as _: raise CriticalError(_) query_time = time.time() - start_time self.msg += ' | offline_nodes={0:d}'.format(offline_nodes) self.msg += self.get_perf_thresholds() self.msg += ' node_count={0:d}'.format(node_count) self.msg += ' query_time={0:.4f}s'.format(query_time)
def process_event_selectors(self, client, trail_list): total_event_selectors = 0 num_management = 0 num_readwrite_all = 0 trails_without_selectors = 0 found = False for trail in trail_list: name = trail['Name'] if self.trail_name and self.trail_name != name: continue found = True trail_info = client.get_event_selectors(TrailName=name) log.debug('%s', jsonpp(trail_info)) event_selectors = trail_info['EventSelectors'] num_event_selectors = len(event_selectors) total_event_selectors += num_event_selectors if num_event_selectors < 1: log.warn('cloud trail %s has no event selectors', trail) self.warning() trails_without_selectors += 1 for event_selector in event_selectors: if event_selector['IncludeManagementEvents']: num_management += 1 if event_selector['ReadWriteType'].lower() == 'all': # All num_readwrite_all += 1 if num_management < num_event_selectors or \ num_readwrite_all < num_event_selectors: self.warning() if self.trail_name and not found: raise CriticalError('cloud trail \'{}\' not found'.format(self.trail_name)) if total_event_selectors == 0: self.warning() return (total_event_selectors, num_management, num_readwrite_all, trails_without_selectors)
def run(self): server_url = '{proto}://{host}:{port}'.format(proto=self.protocol, host=self.host, port=self.port) try: log.debug('setting up Jenkins connection to %s', server_url) start_time = time.time() server = jenkins.Jenkins(server_url, username=self.user, password=self.password, timeout=self.timeout / 3) if log.isEnabledFor(logging.DEBUG): log.debug('getting user') user = server.get_whoami() log.debug('connected as user %s', jsonpp(user)) log.debug('getting plugin info') #plugins = server.get_plugins() # deprecated but .get_plugins() output is not JSON serializable # so must use old deprecated method get_plugins_info() :-/ plugins = server.get_plugins_info() query_time = time.time() - start_time except jenkins.JenkinsException as _: raise CriticalError(_) if log.isEnabledFor(logging.DEBUG): log.debug('%s', jsonpp(plugins)) if self.list_plugins: plugin_list = [] print('Jenkins plugins:\n') for _ in plugins: plugin_list.append(_['longName']) for _ in sorted(plugin_list, key=lambda s: s.lower()): print(_) sys.exit(ERRORS['UNKNOWN']) plugin = None for _ in plugins: if _['longName'].lower() == self.plugin.lower(): plugin = _ break if not plugin: raise CriticalError("plugin '{0}' not found. Try --list to see installed plugins".format(self.plugin)) longname = plugin['longName'] enabled = plugin['enabled'] active = plugin['active'] has_update = plugin['hasUpdate'] self.msg += " plugin '{0}' enabled: {1}, active: {2}".format(longname, enabled, active) if not enabled or not active: self.critical() self.msg += ', update available: {0}'.format(has_update) if self.check_update and has_update: self.warning() self.msg += ' | query_time={0:.4f}s'.format(query_time)
def run(self): self.no_args() directory = self.get_opt('directory') validate_directory(directory) directory = os.path.abspath(directory) try: repo = git.Repo(directory) except InvalidGitRepositoryError as _: raise CriticalError( "directory '{}' does not contain a valid Git repository!". format(directory)) try: untracked_files = repo.untracked_files num_untracked_files = len(untracked_files) changed_files = [item.a_path for item in repo.index.diff(None)] changed_files = [ filename for filename in changed_files if filename not in untracked_files ] num_changed_files = len(changed_files) except InvalidGitRepositoryError as _: raise CriticalError(_) except TypeError as _: raise CriticalError(_) self.msg = '{} changed file{}'.format(num_changed_files, plural(num_changed_files)) self.msg += ', {} untracked file{}'.format(num_untracked_files, plural(num_untracked_files)) self.msg += " in Git checkout at directory '{}'".format(directory) uncommitted_staged_changes = 0 if changed_files or untracked_files: self.critical() if self.verbose: if changed_files: self.msg += ' (changed files: {})'.format( ', '.join(changed_files)) if untracked_files: self.msg += ' (untracked files: {})'.format( ', '.join(untracked_files)) elif repo.is_dirty(): self.msg += ', uncommitted staged changes detected!' self.critical() uncommitted_staged_changes = 1 self.msg += ' | changed_files={};0;0 untracked_files={};0;0'.format( num_changed_files, num_untracked_files) self.msg += ' uncommitted_staged_changes={};0;0'.format( uncommitted_staged_changes)
def parse_json(self, json_data): apps = json_data['apps'] if not apps: raise CriticalError('no completed Yarn apps found') app_list = apps['app'] host_info = '' if self.verbose: host_info = " at '{0}:{1}'".format(self.host, self.port) if not isList(app_list): raise UnknownError("non-list returned for json_data[apps][app] by Yarn Resource Manager{0}"\ .format(host_info)) num_apps = len(app_list) log.info( "processing {0:d} running apps returned by Yarn Resource Manager{1}" .format(num_apps, host_info)) if num_apps > self.limit: raise UnknownError('num_apps {} > limit {}'.format( num_apps, self.limit)) if self.list_apps: self.print_apps(app_list) sys.exit(ERRORS['UNKNOWN']) matched_app = None regex = re.compile(self.app, re.I) for app in app_list: state = app['state'] if state in ('RUNNING', 'ACCEPTED'): continue if regex.search(app['name']): matched_app = app break if not matched_app: raise CriticalError("no finished app/job found with name matching '{app}' in list of last {limit} apps "\ .format(app=self.app, limit=self.limit) + "returned by Yarn Resource Manager{host_info}".format(host_info=host_info)) log.info('found matching app:\n\n%s\n', jsonpp(matched_app)) elapsed_time = self.check_app(matched_app) if self.warn_on_dup_app: log.info('checking for duplicate apps matching the same regex') count = 0 for app in app_list: if regex.match(app['name']): count += 1 if count > 1: self.msg += ', {0} DUPLICATE APPS WITH MATCHING NAMES DETECTED!'.format( count) self.msg += ' | app_elapsed_time={0}{1}'.format( elapsed_time, self.get_perf_thresholds())
def process_args(self): self.brokers = self.get_opt('brokers') # TODO: add broker list validation back in # validate_hostport(self.brokers) log_option('brokers', self.brokers) self.timeout_ms = max((self.timeout * 1000 - 1000) / 2, 1000) try: list_topics = self.get_opt('list_topics') list_partitions = self.get_opt('list_partitions') if list_topics: self.print_topics() sys.exit(ERRORS['UNKNOWN']) self.topic = self.get_opt('topic') except KafkaError: raise CriticalError(self.exception_msg()) if self.topic: validate_chars(self.topic, 'topic', 'A-Za-z-') elif list_topics or list_partitions: pass else: self.usage('--topic not specified') try: if list_partitions: if self.topic: self.print_topic_partitions(self.topic) else: for topic in self.get_topics(): self.print_topic_partitions(topic) sys.exit(ERRORS['UNKNOWN']) except KafkaError: raise CriticalError(self.exception_msg()) self.partition = self.get_opt('partition') # technically optional, will hash to a random partition, but need to know which partition to get offset # if self.partition is not None: validate_int(self.partition, "partition", 0, 10000) self.topic_partition = TopicPartition(self.topic, self.partition) self.acks = self.get_opt('acks') try: self.acks = int(self.acks) except ValueError: pass log_option('acks', self.acks) self.validate_thresholds()
def tmp(req): if req.status_code != 200: err = '' if req.content and isStr( req.content) and len(req.content.split('\n')) < 2: err += ': ' + req.content raise CriticalError("{0}: '{1}' {2}{3}".format( msg, req.status_code, req.reason, err))