def check_version(self, filename, branch, branch_base, arg_var, found_version, branch_version): self.branches_dockerfile_checked.add(branch) self.dockerfiles_checked.add(filename) if arg_var: log.debug("found arg '%s'", arg_var) arg_version = "ARG '{0}={1}'".format(arg_var, found_version) else: arg_version = "'{0}'".format(found_version) #log.debug("arg '%s' matches branch base '%s'", argversion.group(1), branch_base) log.debug("comparing '%s' contents to version derived from branch '%s' => '%s'", filename, branch, branch_version) if not isVersion(branch_version.lstrip('jdk').lstrip('jre')): die("unrecognized branch version '{0}' for branch_base '{1}'" .format(branch_version, branch_base)) #if branch_version == found_version or branch_version == found_version.split('.', 2)[0]: if found_version[0:len(branch_version)] == branch_version: log.info("{0} version '{1}' matches {2}". format(self.valid_git_branches_msg, branch_version, arg_version)) else: log.error("{0} version '{1}' vs Dockerfile {2}". format(self.invalid_git_branches_msg, branch_version, arg_version)) self.dockerfiles_failed += 1 self.branches_failed.add(branch) return False return True
def __parse_args__(self): try: (self.options, self.args) = self.__parser.parse_args() # I don't agree with zero exit code from OptionParser for help/usage, # and want UNKNOWN not CRITICAL(2) for switch mis-usage... except SystemExit: # pragma: no cover sys.exit(ERRORS['UNKNOWN']) if self.options.help: # pragma: no cover self.usage() if self.options.version: # pragma: no cover print('%(version)s' % self.__dict__) sys.exit(ERRORS['UNKNOWN']) if 'timeout' in dir(self.options): self.timeout = self.get_opt('timeout') env_verbose = os.getenv('VERBOSE') if isInt(env_verbose): if env_verbose > self.verbose: log.debug('environment variable $VERBOSE = %s, increasing verbosity', env_verbose) self.verbose = env_verbose elif env_verbose is None: pass else: log.warning("$VERBOSE environment variable is not an integer ('%s')", env_verbose) self.parse_args() return self.options, self.args
def consume(self): self.check_connection() self.check_channel() def connection_timeout_handler(): raise CriticalError("unique message not returned on queue '{queue}' within {secs:.2f} secs"\ .format(queue=self.queue, secs=self.timeout / 3) + \ ", consumer timed out while consuming messages from {name} broker '{host}:{port}'"\ .format(name=self.name, host=self.host, port=self.port)) self.conn.add_timeout(self.timeout / 3, connection_timeout_handler) # don't re-declare, queue should still exist otherwise error out #channel.queue_declare(queue = 'hello') # don't ack as messages could stay in queue indefinitely self.consumer_tag = self.channel.basic_consume(self.consumer_callback, queue=self.queue, # let broker autogenerate consumer_tag # consumer_tag = self.consumer_tag), no_ack=self.no_ack ) # could also use non-callback mechanism - generator that yields tuples (method, properties, body) # requires self.channel.cancel() from within loop # self.channel.consume(self.queue, # no_ack = True, # exclusive = True, # arguments = None, # inactivity_timeout = self.timeout/3) log.debug('start consuming') self.channel.start_consuming() # could instead use basic_get to return single message # self.channel.basic_get(queue = self.queue, no_ack = True) log.info('closing connection to broker') self.conn.close(reply_code=200, reply_text='Normal shutdown') return self.consumed_message
def check_http(self, host, port, url_path=''): if not isStr(url_path): url_path = '' url = '{protocol}://{host}:{port}/{url_path}'.format(protocol=self.protocol, host=host, port=port, url_path=url_path.lstrip('/')) log.info('GET %s', url) try: # timeout here isn't total timeout, it's response time req = requests.get(url, timeout=self.request_timeout) except requests.exceptions.RequestException: return False except IOError: return False log.debug("%s - response: %s %s", url, req.status_code, req.reason) log.debug("%s - content:\n%s\n%s\n%s", url, '='*80, req.content.strip(), '='*80) if req.status_code != 200: return None if self.regex: log.info('%s - checking regex against content', url) if self.regex.search(req.content): log.info('%s - regex matched http output', url) else: log.info('%s - regex did not match http output', url) return None log.info("%s - passed all checks", url) return (host, port)
def check_git_tags_dockerfiles(self, target): target = os.path.abspath(target) gitroot = find_git_root(target) log.debug("finding tags for target '{0}'".format(target)) repo = git.Repo(gitroot) tags = [str(x).split("/")[-1] for x in repo.tags] if self.tag_prefix is not None: log.debug("restricting to tags matching tag prefix") tags = [x for x in tags if self.tag_prefix.match(x)] # if log.isEnabledFor(logging.DEBUG): log.debug("\n\ntags for target %s:\n\n%s\n", target, "\n".join(tags)) original_checkout = "master" try: try: original_checkout = repo.active_branch.name except TypeError as _: pass for tag in tags: log.debug("checking tag '%s' Dockerfiles for target '%s'", tag, target) try: repo.git.checkout(tag) except git.exc.GitCommandError as _: die(_) self.check_path(target, tag) except Exception as _: # pylint: disable=broad-except die(_) finally: log.debug("returning to original checkout '%s'", original_checkout) repo.git.checkout(original_checkout)
def __init__(self, arg, **kwargs): # min=None, max=None, positive=True, integer=True, simple='upper', name='', **kwargs): self.name = kwargs.get("name", "") if self.name: self.name += " " self.thresholds = {"upper": None, "lower": None} self.opts = {"invert": False} self.opts["simple"] = kwargs.get("simple", "upper") self.opts["positive"] = kwargs.get("positive", True) self.opts["integer"] = kwargs.get("integer", True) self.thresholds["min"] = kwargs.get("min", None) self.thresholds["max"] = kwargs.get("max", None) log.debug("%sthreshold simple = %s", self.name, self.opts["simple"]) log.debug("%sthreshold positive = %s", self.name, self.opts["positive"]) log.debug("%sthreshold integer = %s", self.name, self.opts["integer"]) log.debug("%sthreshold min = %s", self.name, self.thresholds["min"]) log.debug("%sthreshold max = %s", self.name, self.thresholds["max"]) if self.opts["simple"] not in ("upper", "lower"): raise CodingError("simple threshold type must be one of: upper, lower") if not isBool(self.opts["positive"]): raise CodingError("positive option must be set to either True or False") if not isBool(self.opts["integer"]): raise CodingError("integer option must be set to either True or False") self.__parse_threshold__(arg, kwargs.get("optional"))
def check_path(self, path, tag): status = True if os.path.isfile(path): return self.check_file(path, tag) elif os.path.isdir(path): if os.path.basename(path) == ".git": return True for item in os.listdir(path): subpath = os.path.join(path, item) if os.path.islink(subpath): subpath = os.path.realpath(subpath) if os.path.isdir(subpath): tag_base = tag.rsplit("-", 1)[0] subpath_base = os.path.basename(subpath) # log.debug('tag_base = %s', tag_base) # log.debug('subpath_base = %s', subpath_base) if subpath_base == tag_base: if not self.check_path(subpath, tag): status = False elif os.path.isfile(subpath): if not self.check_file(subpath, tag): status = False elif not os.path.exists(subpath): log.debug("subpath '%s' does not exist in tag '%s', skipping..." % (subpath, tag)) else: die("failed to determine if subpath '%s' is file or directory in tag '%s'" % (subpath, tag)) elif not os.path.exists(path): log.debug("path '%s' does not exist in tag '%s', skipping..." % (path, tag)) else: die("failed to determine if path '%s' is file or directory in tag '%s'" % (path, tag)) return status
def parse(self, stdout): output = [_ for _ in stdout.split('\n') if _] if len(output) < 2: raise CriticalError("docker image '{repo}' not found! Does not exist or has not been pulled yet?"\ .format(repo=self.docker_image)) name_len = len(self.docker_image) if len(output) > 2: tags = set([line[name_len:name_len + 10].strip() for line in output[1:]]) tags = [tag for tag in tags if tag != '<none>'] tags = sorted(list(tags)) if log.isEnabledFor(logging.DEBUG): for tag in tags: log.debug('found tag: %s', tag) raise UnknownError('too many results returned - did you forget to suffix a specific :tag to ' + \ '--docker-image? (eg. :latest, :1.1). The following tags were found: ' + \ ', '.join(tags) ) header_line = output[0] docker_image_line = output[1] image_header = ' '.join(header_line.split()[2:4]) log.debug('image header column: %s', image_header) if image_header != 'IMAGE ID': raise UnknownError("3rd column in header '{0}' is not 'IMAGE ID' as expected, parsing failed!"\ .format(image_header)) self.msg = "docker image '{repo}'".format(repo=self.docker_image) self.check_id(docker_image_line) self.check_size(docker_image_line)
def check_file(self, filename, tag): filename = os.path.abspath(filename) if os.path.basename(filename) != "Dockerfile": return True parent = os.path.basename(os.path.dirname(filename)) tag_base = tag.rsplit("-", 1)[0] if parent.lower() != tag_base.lower(): log.debug( "skipping '{0}' as it's parent directory '{1}' doesn't match tag base '{2}'".format( filename, parent, tag_base ) ) return True self.valid_git_tags_msg = "%s => Dockerfile Git Tags OK" % filename self.invalid_git_tags_msg = "%s => Dockerfile Git Tags MISMATCH in tag '%s'" % (filename, tag) try: if not self.check_dockerfile_arg(filename, tag): self.failed = True # print(self.invalid_git_tags_msg) return False # now switched to per tag scan this returns way too much redundant output # print(self.valid_git_tags_msg) except IOError as _: die("ERROR: %s" % _) return True
def check_media_file(self, filename): valid_media_msg = '%s => OK' % filename invalid_media_msg = '%s => INVALID' % filename try: # cmd = self.validate_cmd.format(filename) cmd = self.validate_cmd log.debug('cmd: %s %s', cmd, filename) log.info('verifying {0}'.format(filename)) # capturing stderr to stdout because ffprobe prints to stderr in all cases # Python 2.7+ #subprocess.check_output(cmd.split() + [filename], stderr=subprocess.STDOUT) proc = subprocess.Popen(cmd.split() + [filename], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) (stdout, _) = proc.communicate() returncode = proc.wait() if returncode != 0 or (stdout is not None and 'Error' in stdout): _ = CalledProcessError(returncode, cmd) _.output = stdout raise _ print(valid_media_msg) except CalledProcessError as _: if self.verbose > 2: print(_.output) if self.skip_errors: print(invalid_media_msg) self.failed = True return False die(invalid_media_msg)
def check_multirecord_json(self): log.debug('check_multirecord_json()') for line in self.iostream: if isJson(line): # can't use self.print() here, don't want to print valid for every line of a file / stdin if self.passthru: print(line, end='') elif isJson(line.replace("'", '"')): if self.permit_single_quotes: log.debug('valid multirecord json (single quoted)') # self.single_quotes_detected = True if self.passthru: print(line, end='') else: log.debug('invalid multirecord json (single quoted)') self.failed = True if not self.passthru: die('%s (multi-record format)' % self.invalid_json_msg_single_quotes) return False else: log.debug('invalid multirecord json') self.failed = True return False # self.multi_record_detected = True log.debug('multirecord json (all lines passed)') if not self.passthru: print('%s (multi-record format)' % self.valid_json_msg) return True
def check_git_branches_upstream(self, target): target = os.path.abspath(target) gitroot = find_git_root(target) if gitroot is None: die('Failed to find git root for target {0}'.format(target)) log.debug("finding branches for target '{0}'".format(target)) repo = git.Repo(gitroot) branches = repo.branches if self.branch_prefix is not None: log.debug('restricting to branches matching branch prefix') branches = [x for x in branches if self.branch_prefix.match(str(x))] if not branches: log.error("No branches matching '%s' for target '%s'", self.get_opt('branch_prefix'), target) self.status = 'NO BRANCHES' #if log.isEnabledFor(logging.DEBUG): #log.debug('\n\nbranches for target %s:\n\n%s\n', target, '\n'.join(list(branches))) for branch in branches: expected = '{0}/{1}'.format(self.origin, branch) tracking_branch = str(branch.tracking_branch()) if tracking_branch == expected: log.info("OK: repo '{0}' branch '{1}' is tracking '{2}'" .format(gitroot, branch, tracking_branch)) else: self.status = "ERROR" log.error("BAD: branch '{0}' is tracking '{1}' (expected '{2}')" .format(branch, tracking_branch, expected))
def check(self, client): log.info('running Docker info') info = client.info() if log.isEnabledFor(logging.DEBUG): log.debug(jsonpp(info)) containers = info['Containers'] running_containers = info['ContainersRunning'] paused_containers = info['ContainersPaused'] stopped_containers = info['ContainersStopped'] self.msg = 'Docker ' if self.running: self.msg += 'running containers = {}'.format(running_containers) self.check_thresholds(running_containers) self.msg += ' | running_containers={}{}'.format(running_containers, self.get_perf_thresholds()) elif self.paused: self.msg += 'paused containers = {}'.format(paused_containers) self.check_thresholds(paused_containers) self.msg += ' | paused_containers={}{}'.format(paused_containers, self.get_perf_thresholds()) elif self.stopped: self.msg += 'stopped containers = {}'.format(stopped_containers) self.check_thresholds(stopped_containers) self.msg += ' | stopped_containers={}{}'.format(stopped_containers, self.get_perf_thresholds()) elif self.total: self.msg += 'total containers = {}'.format(containers) self.check_thresholds(containers) self.msg += ' | total_containers={}{}'.format(containers, self.get_perf_thresholds()) else: self.msg += 'containers = {}, running containers = {}, paused containers = {}, stopped containers = {}'\ .format(containers, running_containers, paused_containers, stopped_containers) self.msg += ' | containers={} running_containers={} paused_containers={} stopped_containers={}'\ .format(containers, running_containers, paused_containers, stopped_containers)
def check_path(self, path, branch): status = True (branch_base, _) = self.branch_version(branch) if os.path.isfile(path): return self.check_file(path, branch) elif os.path.isdir(path): if os.path.basename(path) == '.git': return True for item in os.listdir(path): subpath = os.path.join(path, item) if os.path.islink(subpath): subpath = os.path.realpath(subpath) if os.path.isdir(subpath): subpath_base = os.path.basename(subpath) #log.debug('subpath_base = %s', subpath_base) if self.normalize_name(subpath_base) == self.normalize_name(branch_base): if not self.check_path(subpath, branch): status = False elif os.path.isfile(subpath): if not self.check_file(subpath, branch): status = False elif not os.path.exists(subpath): log.debug("subpath '%s' does not exist in branch '%s', skipping..." % (subpath, branch)) else: die("failed to determine if subpath '%s' is file or directory in branch '%s'" % (subpath, branch)) elif not os.path.exists(path): log.debug("path '%s' does not exist in branch '%s', skipping..." % (path, branch)) else: die("failed to determine if path '%s' is file or directory in branch '%s'" % (path, branch)) return status
def timeout_max(self, secs): if secs is not None and not isInt(secs): raise CodingError('invalid timeout max passed to set_timeout_max(), must be an integer representing seconds') # pylint: disable=line-too-long # leave this to be able to set max to any amount # validate_int(secs, 'timeout default', 0, self.__timeout_max ) log.debug('setting max timeout to %s secs', secs) self.__timeout_max = secs
def subscribe(self): self.consumer = KafkaConsumer( #self.topic, bootstrap_servers=self.brokers, # client_id=self.client_id, # group_id=self.group_id, request_timeout_ms=self.timeout_ms ) #key_serializer #value_serializer log.debug('partition assignments: {0}'.format(self.consumer.assignment())) # log.debug('subscribing to topic \'{0}\' parition \'{1}\''.format(self.topic, self.partition)) # self.consumer.subscribe(TopicPartition(self.topic, self.partition)) # log.debug('partition assignments: {0}'.format(self.consumer.assignment())) log.debug('assigning partition {0} to consumer'.format(self.partition)) # self.consumer.assign([self.partition]) self.consumer.assign([self.topic_partition]) log.debug('partition assignments: {0}'.format(self.consumer.assignment())) log.debug('getting current offset') self.start_offset = self.consumer.position(self.topic_partition) # self.start_offset = 0 if self.start_offset is None: self.start_offset = 0 #raise UnknownError('Kafka Consumer reported current starting offset = {0}'.format(self.start_offset)) log.debug('recorded starting offset \'{0}\''.format(self.start_offset))
def check_git_branches_dockerfiles(self, target): gitroot = find_git_root(target) if gitroot is None: die('Failed to find git root for target {0}'.format(target)) log.debug("finding branches for target '{0}'".format(target)) repo = git.Repo(gitroot) branches = [str(x) for x in repo.refs if isinstance(x, git.refs.remote.RemoteReference)] branches = [x.split('/')[-1] for x in branches] branches = [x for x in branches if x not in ('HEAD', 'master')] if self.branch_prefix is not None: log.debug('restricting to branches matching branch prefix') branches = [x for x in branches if self.branch_prefix.match(x)] #if log.isEnabledFor(logging.DEBUG): log.debug('\n\nbranches for target %s:\n\n%s\n', target, '\n'.join(branches)) original_checkout = 'master' try: try: original_checkout = repo.active_branch.name except TypeError as _: pass for branch in branches: log.debug("checking branch '%s' Dockerfiles for target '%s'", branch, target) self.branches_checked += 1 try: repo.git.checkout(branch) except git.exc.GitCommandError as _: die(_) self.check_path(target, branch) except Exception as _: # pylint: disable=broad-except traceback.print_exc() sys.exit(1) finally: log.debug("returning to original checkout '%s'", original_checkout) repo.git.checkout(original_checkout)
def run(self): server_url = '{proto}://{host}:{port}'.format(proto=self.protocol, host=self.host, port=self.port) try: log.debug('setting up Jenkins connection to %s', server_url) start_time = time.time() server = jenkins.Jenkins(server_url, username=self.user, password=self.password, timeout=self.timeout / 3) if log.isEnabledFor(logging.DEBUG): log.debug('getting user') user = server.get_whoami() log.debug('connected as user %s', jsonpp(user)) log.debug('getting plugin info') #plugins = server.get_plugins() # deprecated but .get_plugins() output is not JSON serializable # so must use old deprecated method get_plugins_info() :-/ plugins = server.get_plugins_info() query_time = time.time() - start_time except jenkins.JenkinsException as _: raise CriticalError(_) if log.isEnabledFor(logging.DEBUG): log.debug('%s', jsonpp(plugins)) plugin_count = len(plugins) update_count = 0 for plugin in plugins: if plugin['hasUpdate']: update_count += 1 self.msg += " {0} plugin update{1} available out of {2} installed plugin{3}".format(update_count, plural(update_count), plugin_count, plural(plugin_count)) if update_count: self.warning() self.msg += ' | updates_available={0};1 plugins_installed={1} query_time={2:.4f}s'.format(update_count, plugin_count, query_time)
def main(self): try: # Python 2.x super(NagiosPlugin, self).main() # Python 3.x # super().__init__() # redirect_stderr_stdout() except CriticalError as _: qquit('CRITICAL', _) except WarningError as _: qquit('WARNING', _) except UnknownError as _: qquit('UNKNOWN', _) except CodingError as _: qquit('UNKNOWN', 'Programming Error: {0}. {1}'.format(_, support_msg())) except Exception as _: # pylint: disable=broad-except exception_type = type(_).__name__ if log.isEnabledFor(logging.DEBUG): log.debug("exception: '%s'", exception_type) log.debug(traceback.format_exc()) msg = 'Nagios Plugin Exception: {exception_type}: {msg}'.format(exception_type=exception_type, msg=self.exception_msg()) #msg = ', '.join([x.strip() for x in msg.split('\n')]) # ', ' doesn't look nice for ':\n ...' => ':, ...' (snakebite OutOfNNException) #msg = '\t'.join([x.strip() for x in msg.split('\n')]) #if self.options.verbose > 2: # msg = type(_).__name__ + ': ' + msg msg += '. ' + support_msg() qquit('UNKNOWN', msg)
def subscribe(self): self.consumer = KafkaConsumer( #self.topic, bootstrap_servers=self.brokers, # client_id=self.client_id, # group_id=self.group_id, request_timeout_ms=self.timeout_ms ) #key_serializer #value_serializer # this is only a guess as Kafka doesn't expose it's API version #log.debug('kafka api version: %s', self.consumer.config['api_version']) log.debug('partition assignments: {0}'.format(self.consumer.assignment())) # log.debug('subscribing to topic \'{0}\' parition \'{1}\''.format(self.topic, self.partition)) # self.consumer.subscribe(TopicPartition(self.topic, self.partition)) # log.debug('partition assignments: {0}'.format(self.consumer.assignment())) log.debug('assigning partition {0} to consumer'.format(self.partition)) # self.consumer.assign([self.partition]) self.consumer.assign([self.topic_partition]) log.debug('partition assignments: {0}'.format(self.consumer.assignment())) log.debug('getting current offset') # see also highwater, committed, seek_to_end self.start_offset = self.consumer.position(self.topic_partition) if self.start_offset is None: # don't do this, I've seen scenario where None is returned and all messages are read again, better to fail # log.warn('consumer position returned None, resetting to zero') # self.start_offset = 0 raise UnknownError('Kafka Consumer reported current starting offset = {0}'.format(self.start_offset)) log.debug('recorded starting offset \'{0}\''.format(self.start_offset))
def run(self): server_url = '{proto}://{host}:{port}'.format(proto=self.protocol, host=self.host, port=self.port) try: log.debug('setting up Jenkins connection to %s', server_url) start_time = time.time() server = jenkins.Jenkins(server_url, username=self.user, password=self.password, timeout=self.timeout / 3) if log.isEnabledFor(logging.DEBUG): log.debug('getting user') user = server.get_whoami() log.debug('connected as user %s', jsonpp(user)) if self.list_jobs: log.debug('getting jobs') #jobs = server.get_jobs() # recursively get all jobs jobs = server.get_all_jobs() # more efficient with many folders # jobs = server.run_script(""" # import groovy.json.JsonBuilder; # # // get all projects excluding matrix configuration # // as they are simply part of a matrix project. # // there may be better ways to get just jobs # items = Jenkins.instance.getAllItems(AbstractProject); # items.removeAll { # it instanceof hudson.matrix.MatrixConfiguration # }; # # def json = new JsonBuilder() # def root = json { # jobs items.collect { # [ # name: it.name, # url: Jenkins.instance.getRootUrl() + it.getUrl(), # color: it.getIconColor().toString(), # fullname: it.getFullName() # ] # } # } # # // use json.toPrettyString() if viewing # println json.toString() # """) print('Jenkins Jobs:\n') for job in jobs: print(job['fullname']) sys.exit(ERRORS['UNKNOWN']) log.debug('checking job exists') if server.job_exists(self.job): self.msg += 'exists' else: self.critical() self.msg += 'does not exist!' except jenkins.JenkinsException as _: raise CriticalError(_) query_time = time.time() - start_time self.msg += ' | query_time={0:.4f}s'.format(query_time)
def process_json(self, content): try: self.json_data = json.loads(content) if log.isEnabledFor(logging.DEBUG): log.debug('JSON prettified:\n\n%s\n%s', jsonpp(self.json_data), '='*80) return self.parse_json(self.json_data) except (KeyError, ValueError) as _: #raise UnknownError('{0}: {1}. {2}'.format(type(_).__name__, _, support_msg_api())) raise UnknownError('{0}. {1}'.format(self.exception_msg(), support_msg_api()))
def parse_table(table): for row in table.findChildren('tr'): for col in row.findChildren('td'): if 'Regions in Transition for more than ' in col.get_text(): log.debug('found Regions in Transition for more than ... getting next td') next_sibling = col.findNext('td') regions_stuck_in_transition = next_sibling.get_text().strip() return regions_stuck_in_transition return None
def normalize_name(name): # allow all -dev dirs to match same branch # special case for solr -> solrcloud dirs name2 = name name2 = re.sub(pattern=r'-dev$', repl='', string=name2) name2 = re.sub(pattern=r'cloud$', repl='', string=name2) name2 = name2.lower() log.debug("normalized name '%s' => '%s'", name, name2) return name2
def check(self, client): log.info('running Docker info') info = client.info() if log.isEnabledFor(logging.DEBUG): log.debug(jsonpp(info)) images = info['Images'] self.msg = 'Docker images = {}'.format(images) self.check_thresholds(images) self.msg += ' | docker_images={}{}'.format(images, self.get_perf_thresholds())
def main(self): # log.debug('running main()') log.setLevel(logging.WARN) self.setup() try: self.add_options() self.add_default_opts() except InvalidOptionException as _: self.usage(_) try: self.__parse_args__() # broken # autoflush() # too late # os.environ['PYTHONUNBUFFERED'] = "anything" self.verbose = self.get_opt('verbose') if self.is_option_defined('quiet') and self.get_opt('quiet'): self.verbose = 0 elif self.verbose > 2: log.setLevel(logging.DEBUG) elif self.verbose > 1: log.setLevel(logging.INFO) elif self.verbose > 0 and self._prog[0:6] != 'check_': log.setLevel(logging.WARN) if self.options.debug: log.setLevel(logging.DEBUG) # pragma: no cover log.debug('enabling debug logging') if self.verbose < 3: self.verbose = 3 log.info('Hari Sekhon %s', self.version) log.info(self._github_repo) log.info('verbose level: %s (%s)', self.verbose, logging.getLevelName(log.getEffectiveLevel())) if self.timeout is not None: validate_int(self.timeout, 'timeout', 0, self.timeout_max) log.debug('setting timeout alarm (%s)', self.timeout) signal.signal(signal.SIGALRM, self.timeout_handler) signal.alarm(int(self.timeout)) # if self.options.version: # print(self.version) # sys.exit(ERRORS['UNKNOWN']) self.process_options() self.process_args() try: self.run() except CriticalError as _: qquit('CRITICAL', _) except WarningError as _: qquit('WARNING', _) except UnknownError as _: qquit('UNKNOWN', _) self.__end__() except InvalidOptionException as _: self.usage(_) # pragma: no cover except KeyboardInterrupt: # log.debug('Caught control-c...') print('Caught control-c...') # pragma: no cover
def get_mounts(): try: with open('/proc/mounts', 'r') as _: lines = _.readlines() if log.isEnabledFor(logging.DEBUG): for line in lines: log.debug('/proc/mounts: %s', line.rstrip('\n')) return lines except IOError as _: raise UnknownError(_)
def process_file(self, filepath): log.debug('processing filepath \'%s\'', filepath) if filepath == '-': filepath = '<STDIN>' if filepath == '<STDIN>': self.json_to_xml(sys.stdin.read()) else: with open(filepath) as _: content = _.read() print(self.json_to_xml(content, filepath=filepath))
def save_cluster(self, cluster, path=''): # log.debug('save_cluster(%s, %s)' % (cluster, name)) if not path: path = os.path.normpath(os.path.join(self.blueprint_dir, cluster)) data = self.get_cluster_blueprint(cluster) # logged in save() # log.info("saving cluster '%s' blueprint to file '%s'" % (cluster, path)) if log.isEnabledFor(logging.DEBUG): log.debug("cluster '%s' blueprint content = '%s'" % (cluster, data)) self.save(cluster, path, data)
def save_blueprint(self, blueprint, path=''): # log.debug('save_blueprint(%s, %s' % (blueprint, name)) if not path: path = os.path.normpath(os.path.join(self.blueprint_dir, blueprint)) data = self.get_blueprint(blueprint) # logged in save() # log.info("saving blueprint '%s' to file '%s" % (blueprint, path)) if log.isEnabledFor(logging.DEBUG): log.debug("blueprint '%s' content = '%s'" % (blueprint, data)) self.save(blueprint, path, data)
def get_failing_job_id_from_build(self, build): log.info('getting failed job id for build %s', build['id']) if 'jobs' not in build: raise UnknownError( 'no jobs field found in build, {0}'.format(support_msg_api)) for _ in build['jobs']: _id = _['id'] url = 'https://api.travis-ci.org/jobs/{id}'.format(id=_id) req = self.request_handler.get(url) # if this raises ValueError it'll be caught by run handler job = json.loads(req.content) if log.isEnabledFor(logging.DEBUG): log.debug("job id %s status:\n%s", _id, jsonpp(job)) if job['state'] == 'finished' and job['status'] in (None, '1'): return _id raise UnknownError('no failed job found in build {0}'.format( build['id']))
def run(self): url = 'https://api.travis-ci.org/repos/{repo}/builds'.format( repo=self.repo) request_handler = RequestHandler() req = request_handler.get(url) if log.isEnabledFor(logging.DEBUG): log.debug("\n%s", jsonpp(req.content)) try: self.parse_results(req.content) except (KeyError, ValueError): exception = traceback.format_exc().split('\n')[-2] # this covers up the traceback info and makes it harder to debug #raise UnknownError('failed to parse expected json response from Travis CI API: {0}'.format(exception)) qquit( 'UNKNOWN', 'failed to parse expected json response from Travis CI API: {0}. {1}' .format(exception, support_msg_api()))
def is_file_dup_by_regex(self, filepath): #match = re.search(self.regex, filepath) basename = os.path.basename(filepath) match = re.search(self.regex, basename) if match: log.debug("regex matched file '%s'", filepath) if match.groups(): capture = match.group(1) else: capture = match.group(0) if capture in self.regex_captures: self.dups_by_regex[capture] = self.dups_by_regex.get(capture, set()) self.dups_by_regex[capture].add(self.regex_captures[capture]) self.dups_by_regex[capture].add(filepath) return True self.regex_captures[capture] = filepath return False
def check_table_regions(self): log.info('checking regions for table \'%s\'', self.table) regions = None try: table = self.conn.table(self.table) regions = table.regions() except HBaseIOError as _: #if 'org.apache.hadoop.hbase.TableNotFoundException' in _.message: if 'TableNotFoundException' in _.message: qquit('CRITICAL', 'table \'{0}\' does not exist'.format(self.table)) else: qquit('CRITICAL', _) except (socket.timeout, ThriftException) as _: qquit('CRITICAL', _) if log.isEnabledFor(logging.DEBUG): log.debug('%s', jsonpp(regions)) if not regions: qquit('CRITICAL', 'failed to get regions for table \'{0}\''.format(self.table)) if not isList(regions): qquit('UNKNOWN', 'region info returned is not a list! ' + support_msg_api()) num_regions = len(regions) log.info('num regions: %s', num_regions) self.msg = 'HBase table \'{0}\' has {1} region{2}'.format(self.table, num_regions, plural(num_regions)) self.check_thresholds(num_regions) num_unassigned_regions = 0 for region in regions: try: if not region['server_name']: #log.debug('region \'%s\' is not assigned to any server', region['name']) num_unassigned_regions += 1 except KeyError as _: qquit('UNKNOWN', 'failed to find server assigned to region. ' + support_msg_api()) log.info('num unassigned regions: %s', num_unassigned_regions) self.msg += ', {0} unassigned region{1}'.format(num_unassigned_regions, plural(num_unassigned_regions)) if num_unassigned_regions > 0: self.warning() self.msg += '!' self.msg += ' |' self.msg += ' num_regions={0}'.format(num_regions) + self.get_perf_thresholds(boundary='lower') self.msg += ' num_unassigned_regions={0};1;0'.format(num_unassigned_regions) log.info('finished, closing connection') self.conn.close()
def process_options(self): self.no_args() self.host = self.get_opt('host') self.port = self.get_opt('port') validate_host(self.host) validate_port(self.port) if self.auth and self.get_opt('kerberos'): self.auth = 'kerberos' if self.auth: self.user = self.get_opt('user') self.password = self.get_opt('password') if self.auth == 'optional': if self.user and self.password: validate_user(self.user) validate_password(self.password) elif self.auth == 'kerberos': if os.getenv('KRB5_CLIENT_KTNAME'): log.debug('kerberos enabled, will try to use keytab at %s', os.getenv('KRB5_CLIENT_KTNAME')) # if using KRB5_CLIENT_KTNAME to kinit avoid clobbering the same TGT cache /tmp/krb5cc_{uid} # as that may be used by different programs kinit'd different keytabs os.environ[ 'KRB5CCNAME'] = '/tmp/krb5cc_{euid}_{basename}'.format( euid=os.geteuid(), basename=prog) else: validate_user(self.user) validate_password(self.password) ssl_noverify = self.get_opt('ssl_noverify') if ssl_noverify: log_option('ssl no verify', 'true') ssl = 1 os.environ['SSL_NO_VERIFY'] = '1' # doesn't work, probably too late after instantiation #if not os.getenv('PYTHONWARNINGS'): # os.environ['PYTHONWARNINGS'] = 'ignore:Unverified HTTPS request' urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) else: ssl = self.get_opt('ssl') log_option('ssl', ssl) if ssl and self.protocol == 'http': self.protocol = 'https' if self.json: # recommended for many systems like CouchDB # but breaks Ambari API calls #self.headers['Accept'] = 'application/json' self.headers['Content-Type'] = 'application/json'
def run(self): self.no_args() host = self.get_opt('host') port = self.get_opt('port') slave = self.get_opt('slave') list_slaves = self.get_opt('list_slaves') validate_host(host) validate_port(port) if not list_slaves: validate_host(slave, 'slave') url = 'http://%(host)s:%(port)s/master/slaves' % locals() log.debug('GET %s', url) try: req = requests.get(url) except requests.exceptions.RequestException as _: qquit('CRITICAL', _) log.debug("response: %s %s", req.status_code, req.reason) log.debug("content:\n{0}\n{1}\n{2}".format('=' * 80, req.content.strip(), '=' * 80)) if req.status_code != 200: qquit('CRITICAL', "Non-200 response! %s %s" % (req.status_code, req.reason)) content = req.content if not isJson(content): qquit('UNKNOWN', 'invalid JSON returned by Mesos Master') data = json.loads(content) if log.isEnabledFor(logging.DEBUG): log.debug('\n%s', jsonpp(data)) slaves = {} regex = re.compile(r'^slave\(\d+\)\@(.+):\d+') try: for item in data['slaves']: match = regex.match(item['pid']) if match: slaves[item['hostname']] = match.group(1) else: slaves[item['hostname']] = item['pid'] except KeyError: qquit( 'UNKNOWN', 'failed to parse slaves from Mesos API output. {0}'.format( support_msg_api)) if list_slaves: qquit('UNKNOWN', 'Slaves list:\n\n{0}'.format(dict_lines(slaves))) log.info('found slaves:\n\n{0}\n'.format(dict_lines(slaves))) slave = slave.lower() for _ in slaves: if slave == _.lower() or slave == slaves[_].lower(): qquit('OK', "Mesos slave '{0}' registered with master".format(slave)) break else: qquit('CRITICAL', "Mesos slave '{0}' not registered with master".format(slave))
def process_csv(self, filehandle): csvreader = None try: if self.delimiter is not None: try: csvreader = csv.reader(filehandle, delimiter=self.delimiter, quotechar=self.quotechar) except TypeError as _: self.usage(_) else: # dialect = csv.excel dialect = csv.Sniffer().sniff(filehandle.read(1024)) # this will raise an Error if invalid dialect.strict = True filehandle.seek(0) csvreader = csv.reader(filehandle, dialect) except csv.Error as _: log.warning('file %s: %s', self.filename, _) return False count = 0 try: # csvreader doesn't seem to generate any errors ever :-( # csv module allows entire lines of json/xml/yaml to go in as a single field # Adding some invalidations manually for field_list in csvreader: # list of fields with no separator information # log.debug("line: %s", _) # make it fail if there is only a single field on any line if len(field_list) < 2: return False # it's letting JSON through :-/ if field_list[0] == '{': return False # extra protection along the same lines as anti-json: # the first char of field should be alphanumeric, not syntax # however instead of isAlnum allow quotes for quoted CSVs to pass validation if not isChars(field_list[0][0], 'A-Za-z0-9\'"'): return False count += 1 except csv.Error as _: log.warning('file %s, line %s: %s', self.filename, csvreader.line_num, _) return False if count == 0: log.debug('zero lines detected, blank input is not valid CSV') return False log.debug('%s CSV lines passed', count) return True
def get_latest_failed_build(self): log.info('getting latest failed build') # gets 404 unless replacing the slash url = 'https://api.travis-ci.org/repo/{repo}/builds'.format(repo=self.repo.replace('/', '%2F')) # request returns blank without authorization header req = self.request_handler.get(url, headers=self.headers) if log.isEnabledFor(logging.DEBUG): log.debug("\n%s", jsonpp(req.content)) try: latest_build = self.parse_latest_failed_build(req.content) except (KeyError, ValueError): exception = traceback.format_exc().split('\n')[-2] # this covers up the traceback info and makes it harder to debug #raise UnknownError('failed to parse expected json response from Travis CI API: {0}'.format(exception)) qquit('UNKNOWN', 'failed to parse expected json response from Travis CI API: {0}. {1}'. format(exception, support_msg_api())) return latest_build
def req(self, method, url, *args, **kwargs): if '://' not in url: url = 'http://' + url self.url = url log.debug('%s %s', str(method).upper(), url) req = None if 'headers' in kwargs: kwargs['headers']['User-Agent'] = get_topfile() else: kwargs['headers'] = {'User-Agent': get_topfile()} try: req = getattr(requests, method)(url, *args, **kwargs) except requests.exceptions.RequestException as _: self.exception_handler(_) self.log_output(req) self.process_req(req) return req
def check_file(self, filename): self.filename = filename if self.filename == '-': self.filename = '<STDIN>' self.valid_ini_msg = '%s => INI OK' % self.filename self.invalid_ini_msg = '%s => INI INVALID' % self.filename if self.filename == '<STDIN>': log.debug('ini stdin') # TODO: should technically write to temp file to be able to seek(0) for print mode self.check_ini(sys.stdin) else: log.debug('checking %s', self.filename) try: with open(self.filename) as iostream: self.check_ini(iostream) except IOError as _: die("ERROR: %s" % _)
def run(self): if not self.args: self.usage('no Dockerfile / directory args given') args = uniq_list_ordered(self.args) self.branch_prefix = self.get_opt('branch_prefix') if self.branch_prefix is not None: validate_regex(self.branch_prefix, 'branch prefix') self.branch_prefix = re.compile(self.branch_prefix) for arg in args: if not os.path.exists(arg): print("'%s' not found" % arg) sys.exit(ERRORS['WARNING']) if os.path.isfile(arg): log_option('file', arg) elif os.path.isdir(arg): log_option('directory', arg) else: die("path '%s' could not be determined as either a file or directory" % arg) for arg in args: self.check_git_branches_dockerfiles(arg) log.info('Total Branches: %s', len(self.branches)) log.info('Selected Branches: %s', len(self.selected_branches)) log.info('Branches checked: %s', self.branches_checked) log.info('Branches with Dockerfile checked: %s', len(self.branches_dockerfile_checked)) branches_skipped = len(self.branches_skipped) if branches_skipped > 0: log.warn('{0} branches skipped for not matching expected naming format' .format(branches_skipped)) branches_not_checked = len(self.selected_branches) - len(self.branches_dockerfile_checked) if branches_not_checked > 1: log.warn('{0} branches not checked (no matching Dockerfile found?)'.format(branches_not_checked)) if log.isEnabledFor(logging.DEBUG): log.debug('Branches with no corresponding Dockerfile found:\n%s', '\n'.join(set(self.selected_branches) - set(self.branches_dockerfile_checked))) log.info('{0} Dockerfiles checked'.format(len(self.dockerfiles_checked))) branches_failed = len(self.branches_failed) _ = '{0} Dockerfiles failed validation across {1} branches'.format(self.dockerfiles_failed, branches_failed) if branches_failed > 0: log.error(_) else: log.info(_) if self.failed: log.error('Dockerfile validation FAILED') sys.exit(ERRORS['CRITICAL']) log.info('Dockerfile validation SUCCEEDED')
def process_database(self, database, table_regex): tables = [] table_count = 0 log.info("querying tables for database '%s'", database) conn = self.connect(database) with conn.cursor() as table_cursor: try: # doesn't support parameterized query quoting from dbapi spec #table_cursor.execute('use %(database)s', {'database': database}) table_cursor.execute('use `{}`'.format(database)) table_cursor.execute('show tables') except impala.error.HiveServer2Error as _: log.error("error querying tables for database '%s': %s", database, _) if 'AuthorizationException' in str(_): return raise for table_row in table_cursor: table = table_row[0] table_count += 1 if not table_regex.search(table): log.debug("skipping database '%s' table '%s', does not match regex '%s'", \ database, table, self.table) continue tables.append(table) log.info("%s/%s tables selected for database '%s'", len(tables), table_count, database) for table in tables: try: query = self.query.format(db='`{}`'.format(database), table='`{}`'.format(table)) except KeyError as _: if _ == 'db': query = self.query.format(table='`{}`'.format(table)) else: raise try: self.execute(conn, database, table, query) self.table_count += 1 except Exception as _: if self.ignore_errors: log.error("database '%s' table '%s': %s", database, table, _) continue raise
def list_keys(self, service_account_email): log.debug("getting keys for service account '%s'", service_account_email) keys = self.service.projects()\ .serviceAccounts()\ .keys()\ .list(name='projects/-/serviceAccounts/' + service_account_email).execute() for key in keys['keys']: if key['keyType'] == 'SYSTEM_MANAGED': continue _id = key['name'].split('/')[-1] created_date = key['validAfterTime'] expiry_date = key['validBeforeTime'] created_datetime = datetime.strptime(created_date, "%Y-%m-%dT%H:%M:%SZ") expiry_datetime = datetime.strptime(expiry_date, "%Y-%m-%dT%H:%M:%SZ") age_timedelta = datetime.utcnow() - created_datetime age_days = int(age_timedelta.total_seconds() / 86400) expired = False if expiry_date == '9999-12-31T23:59:59Z': expires_in_days = 'NEVER' else: expires_in_timedelta = expiry_datetime - datetime.utcnow() expires_in_days = int(expires_in_timedelta.total_seconds() / 86400) if expires_in_days < 1: expired = True if self.no_expiry and (expires_in_days != 'NEVER'): continue if self.expired and expired: continue if self.expires_within_days is not None and \ (expires_in_days == 'NEVER' or expires_in_days > self.expires_within_days): continue print( '{id} {created} {expires} {age:4d} {expires_in:5s} {expired} {service_account}' .format(id=_id, created=created_date, expires=expiry_date, age=age_days, expires_in=expires_in_days, expired=expired, service_account=service_account_email))
def check_dockerfile_arg(self, filename, branch): log.debug('check_dockerfile_arg({0}, {1})'.format(filename, branch)) branch_base = str(branch).replace('-dev', '') (branch_base, branch_versions) = self.branch_version(branch) with open(filename) as filehandle: version_index = 0 for line in filehandle: #log.debug(line.strip()) # hack for Scala Java version, hacky but can't think of a better more generic way to do this right now match = self.arg_regex.match(line.strip()) if match: arg_var = match.group(1) # this is too restrictive and prevents finding a lot of issues with # more complex naming conventions for kafka, centos-java/scala etc # instead we now expect ARG *_VERSION to be in the same order as the version numbers in branch name #log.debug("checking arg group 1 '%s' == branch_base '%s'", argversion.group(1), branch_base) #if self.normalize_name(arg_var) == self.normalize_name(branch_base).replace('-', '_'): if version_index >= len(branch_versions): return True branch_version = branch_versions[version_index] found_version = match.group(2) if not self.check_version(filename=filename, branch=branch, branch_base=branch_base, arg_var=arg_var, found_version=found_version, branch_version=branch_version): return False version_index += 1 elif branch_base == 'scala' \ and len(branch_versions) > 1 \ and branch_versions[1] is not None: #log.debug('special scala condition checking for jdk version') match = self.jdk_regex.match(line) if match: found_version = match.group(1) #log.debug('found jdk version \'%s\'', found_version) if not self.check_version(filename=filename, branch=branch, branch_base=branch_base, arg_var=None, found_version=found_version, branch_version=branch_versions[version_index+1]): return False return True
def run(self): iam = boto3.client('iam') log.info('generating credentials report') while True: result = iam.generate_credential_report() log.debug('%s', result) if result['State'] == 'COMPLETE': log.info('credentials report generated') break log.info('waiting for credentials report') time.sleep(1) try: result = iam.get_credential_report() except ClientError as _: raise csv_content = result['Content'] log.debug('%s', csv_content) filehandle = StringIO(unicode(csv_content)) filehandle.seek(0) csvreader = csv.reader(filehandle) headers = next(csvreader) assert headers[0] == 'user' assert headers[4] == 'password_last_used' assert headers[10] == 'access_key_1_last_used_date' assert headers[15] == 'access_key_2_last_used_date' self.now = datetime.utcnow() found = False for row in csvreader: user = row[0] if user != self.user: continue found = True last_used_days = self.get_user_last_used_days(row) if not found: raise UnknownError('AWS user {} not found'.format(self.user)) if last_used_days <= self.days: self.warning() if last_used_days == 0: self.msg = 'AWS user {} last used within the last day'.format( self.user) else: self.msg = 'AWS user {} last used {} day{} ago'.format( self.user, last_used_days, plural(last_used_days)) self.msg += ' | last_used_days={};0;;{}'.format( last_used_days, self.days)
def process_message(self): message = self.message message = re.sub( r'\s*(?:[\w\s]+?\s)?(?:OK|WARNING|CRITICAL|UNKNOWN)(?:\s[\w\s]+?)?\s*:\s*', '', message, 1, re.I) if re.search('^Hari Sekhon', message): _ = re.search('^usage:', message, re.M) if _: log.debug( 'stripping off my extended plugin description header up to usage: options line' + 'to make it more obvious that a usage error has occurred') message = message[_.start():] message = message.rstrip('\n') message = re.sub(r'\r', '', message) message = re.sub(r'\n', r' \\n ', message) message = re.sub(r',\s*', '... ', message) self.message = message
def get_version(self): data = None try: conn = socket.create_connection((self.host, self.port), timeout=self.timeout/2) conn.sendall('envi') data = conn.recv(1024) conn.close() except socket.error as _: qquit('CRITICAL', "Failed to connect to ZooKeeper at '{host}:{port}': "\ .format(host=self.host, port=self.port) + str(_)) version = None log.debug(data.strip()) for line in data.split('\n'): _ = self.version_line_regex.match(line) if _: version = _.group(1) break return version
def run(self): log.info('testing AWS API call') # there isn't really a .ping() type API endpoint so just connect to IAM and list users iam = boto3.client('iam') try: _ = iam.list_users() # just in case we get an iterator, consume it to flush out any error collections.deque(_, maxlen=0) if log.isEnabledFor(logging.DEBUG): log.debug('\n\n%s', _) log.debug('\n\n%s', jsonpp(_)) # pylint: disable=broad-except except Exception as _: if log.isEnabledFor(logging.DEBUG): raise else: raise CriticalError(_) self.msg = 'AWS API credentials OK'
def check_file(self, filename): self.filename = filename if self.filename == '-': self.filename = '<STDIN>' self.valid_csv_msg = '%s => CSV OK' % self.filename self.invalid_csv_msg = '%s => CSV INVALID' % self.filename if self.filename == '<STDIN>': log.debug('checking stdin') self.check_csv(sys.stdin) else: if self.is_excluded(filename): return log.debug('checking %s', self.filename) try: with open(self.filename) as iostream: self.check_csv(iostream) except IOError as _: die("ERROR: %s" % _)
def process_table(self, table): try: table_handle = self.conn.table(table) regions = table_handle.regions() if len(regions) < 1: qquit('UNKNOWN', "no regions found for table '{0}'".format(table)) for region in regions: log.debug("table '%s' region '%s'", table, region) server = region['server_name'] self.server_region_counts[ server] = self.server_region_counts.get(server, 0) self.server_region_counts[server] += 1 except (socket.timeout, ThriftException, HBaseIOError) as _: qquit('CRITICAL', _) except KeyError as _: qquit('UNKNOWN', 'failed to process region information. ' + support_msg_api())
def parse(json_data): try: # it's already nicely layed out #if log.isEnabledFor(logging.DEBUG): # log.debug('%s', jsonpp(json_data)) compaction_queue_size = None for bean in json_data['beans']: if bean['name'] == 'Hadoop:service=HBase,name=RegionServer,sub=Server': if log.isEnabledFor(logging.DEBUG): log.debug('found RegionServer section:') log.debug('%s', jsonpp(bean)) compaction_queue_size = bean['compactionQueueLength'] if not isInt(compaction_queue_size): qquit('UNKNOWN', 'non-integer returned for compactionQueueLength! ' + support_msg_api()) return compaction_queue_size except KeyError as _: qquit('UNKNOWN', _ + ': failed to parse HBase Master jmx info. ' + support_msg_api()) qquit('UNKNOWN', 'RegionServer mbean not found, double check this is pointing to an HBase RegionServer')
def clean_detail(self): detail = self.detail detail = re.sub( r'\s*(?:[\w\s]+?\s)?(?:OK|WARNING|CRITICAL|UNKNOWN)(?:\s[\w\s]+?)?\s*:\s*', '', detail, 1, re.I) if re.search('^Hari Sekhon', detail): _ = re.search('^usage:', detail, re.M) if _: log.debug( 'stripping off my extended plugin description header up to usage: options line' + 'to make it more obvious that a usage error has occurred') detail = detail[_.start():] detail = detail.rstrip('\n') detail = re.sub(r'\r', '', detail) detail = re.sub(r'\n', r' \\n ', detail) detail = re.sub(r',\s*', '... ', detail) self.detail = detail
def get_peers(content): json_data = None try: json_data = json.loads(content) except ValueError: raise UnknownError("non-json data returned by consul: '%s'. %s" % (content, support_msg_api())) if not json_data: raise CriticalError('no peers found, recently started?') #if not json_data: # raise UnknownError("blank list returned by consul! '%s'. %s" % (content, support_msg_api())) if not isList(json_data): raise UnknownError("non-list returned by consul: '%s'. %s" % (content, support_msg_api())) for peer in json_data: log.debug('peer: {0}'.format(peer)) peers = uniq_list(json_data) return peers
def check_table(self): log.info('checking table \'%s\'', self.table) if not self.conn.is_table_enabled(self.table): qquit('CRITICAL', "table '{0}' is disabled!".format(self.table)) table_conn = self.conn.table(self.table) families = table_conn.families() self.num_column_families = len(families) log.info('found %s column families: %s', self.num_column_families, families) regions = table_conn.regions() self.num_regions = len(regions) log.info('found %s regions', self.num_regions) if log.isEnabledFor(logging.DEBUG): #log.debug('regions list:\n%s', '\n'.join([_['name'] for _ in regions])) log.debug('regions list: \n%s', '\n'.join([str(_) for _ in regions])) for column_family in sorted(families): column = '{0}:{1}'.format(column_family, self.column_qualifier) for region in regions: self.check_region(table_conn, column, region)
def check_table(self): log.info('checking table \'%s\'', self.table) is_enabled = None families = None try: is_enabled = self.conn.is_table_enabled(self.table) log.info('enabled: %s', is_enabled) table = self.conn.table(self.table) families = table.families() except HBaseIOError as _: #if 'org.apache.hadoop.hbase.TableNotFoundException' in _.message: if 'TableNotFoundException' in _.message: qquit('CRITICAL', 'table \'{0}\' does not exist'.format(self.table)) else: qquit('CRITICAL', _) except (socket.error, socket.timeout, ThriftException) as _: qquit('CRITICAL', _) if log.isEnabledFor(logging.DEBUG): log.debug('column families:\n%s', jsonpp(families)) if not families: qquit('CRITICAL', 'failed to get column families for table \'{0}\''.format(self.table)) if not isDict(families): qquit('UNKNOWN', 'column family info returned was not a dictionary! ' + support_msg_api()) num_families = len(families) log.info('num families: %s', num_families) self.msg = 'HBase table \'{0}\' is '.format(self.table) if is_enabled: self.msg += 'enabled, ' else: self.critical() self.msg += 'disabled! ' self.msg += '{0} column '.format(num_families) if num_families == 1: self.msg += 'family' else: self.msg += 'families' self.check_thresholds(num_families) self.msg += ' | num_column_families={0}'.format(num_families) + self.get_perf_thresholds(boundary='lower') log.info('finished, closing connection') self.conn.close()
def check_git_branches_dockerfiles(self, target): gitroot = find_git_root(target) if gitroot is None: die('Failed to find git root for target {0}'.format(target)) log.debug("finding branches for target '{0}'".format(target)) repo = git.Repo(gitroot) #branches = [str(x) for x in repo.refs if isinstance(x, git.refs.remote.RemoteReference)] branches = [str(x) for x in repo.refs if isinstance(x, git.Head)] branches = [x.split('/')[-1] for x in branches] branches = set(branches) branches = [x for x in branches if x not in ('HEAD', 'master')] self.branches = branches if self.branch_prefix is not None: log.debug('restricting to branches matching branch prefix') branches = [x for x in branches if self.branch_prefix.match(x)] self.selected_branches = branches #if log.isEnabledFor(logging.DEBUG): log.debug('\n\nbranches for target %s:\n\n%s\n', target, '\n'.join(branches)) # in Travis CI there is no original branch and master branch does not exist, so falling back to assuming master # causes failure, better to not check out original branch if you don't know #original_branch = 'master' original_branch = None try: try: original_branch = repo.active_branch.name except TypeError as _: pass for branch in branches: log.debug("checking branch '%s' Dockerfiles for target '%s'", branch, target) self.branches_checked += 1 try: repo.git.checkout(branch) except git.GitError as _: die(_) self.check_path(target, branch) except Exception as _: # pylint: disable=broad-except traceback.print_exc() sys.exit(1) finally: if original_branch != None: log.debug("checking out original branch '%s'", original_branch) repo.git.checkout(original_branch)
def check(self, client): log.info('getting Docker version') _ = client.version() if log.isEnabledFor(logging.DEBUG): log.debug((jsonpp(_))) version = _['Version'] if not isVersionLax(version): raise UnknownError('Docker version unrecognized \'{}\'. {}'\ .format(version, support_msg_api())) self.msg = 'Docker version = {}'.format(version) if self.expected is not None: log.info("verifying version against expected regex '%s'", self.expected) if re.match(self.expected, str(version)): log.info('version regex matches retrieved version') else: log.info('version regex does not match retrieved version') self.msg += " (expected '{}')".format(self.expected) self.critical() self.msg += ', API version = {}'.format(_['ApiVersion'])
def run_host(self, host, url): log.info('querying %s', host) req = RequestHandler().get(url) json_data = json.loads(req.text) uptime = None beans = json_data['beans'] if self.since_uptime: for bean in beans: if bean['name'] == 'java.lang:type=Runtime': uptime = int(bean['Uptime'] / 1000) break if not uptime: raise UnknownError("failed to find uptime in JMX stats for host '{}'. {}"\ .format(host, support_msg_api())) for bean in beans: log.debug('processing Regions bean') if bean['name'] == 'Hadoop:service=HBase,name=RegionServer,sub=Regions': self.process_bean(host, bean, uptime) self.print_stats(host)
def fetch(self, url_suffix): err = '' try: response = self.get(url_suffix) except requests.exceptions.RequestException as _: err = "failed to fetch Ambari Blueprint from '%s': %s" % (self.url, _) # log.critical(err) qquit('CRITICAL', err) json_data = json.loads(response) if log.isEnabledFor(logging.DEBUG): log.debug("blueprint = " + jsonpp(json_data)) try: del json_data['href'] log.debug( "stripped href as it's not valid if re-submitting the blueprint to Ambari" ) except KeyError as _: pass # Ambari 2.1.3 supports this according to: # https://cwiki.apache.org/confluence/display/AMBARI/Blueprints#Blueprints-ClusterCreationTemplateStructure # json_data['config_recommendation_strategy'] = 'NEVER_APPLY' # default # json_data['config_recommendation_strategy'] = 'ONLY_STACK_DEFAULTS_APPLY' # json_data['config_recommendation_strategy'] = 'ALWAYS_APPLY' if self.strip_config: log.info( 'stripping out config sections of blueprints to make more generic' ) try: del json_data['configurations'] for hostgroup in json_data['host_groups']: del hostgroup['configurations'] except KeyError as _: pass try: json_data['host_groups'] = list_sort_dicts_by_value( json_data['host_groups'], 'name') for hostgroup in json_data['host_groups']: hostgroup['components'] = list_sort_dicts_by_value( hostgroup['components'], 'name') except KeyError as _: qquit('CRITICAL', 'failed to sort blueprint: %s' % _) return jsonpp(json_data)
def run(self): # pylint: disable=no-self-use linux_only() regex = re.compile(r'^HugePages_Total:\s+(\d+)\s*$') hugepages_total = None with open('/proc/meminfo') as meminfo: for line in meminfo: if 'HugePage' in line: log.debug(line) match = regex.match(line) if match: hugepages_total = int(match.group(1)) # protected by regex break if hugepages_total is None: raise UnknownError('HugePages Total not found in /proc/meminfo. {}'.format(support_msg())) if hugepages_total == 0: self.msg += 'disabled' else: raise CriticalError(' Huge Pages = enabled. This should be disabled for Big Data ' + 'systems such as Hadoop / MongoDB for performance reasons etc...')