def check(self, client): log.info('running Docker info') info = client.info() if log.isEnabledFor(logging.DEBUG): log.debug(jsonpp(info)) containers = info['Containers'] running_containers = info['ContainersRunning'] paused_containers = info['ContainersPaused'] stopped_containers = info['ContainersStopped'] self.msg = 'Docker ' if self.running: self.msg += 'running containers = {}'.format(running_containers) self.check_thresholds(running_containers) self.msg += ' | running_containers={}{}'.format(running_containers, self.get_perf_thresholds()) elif self.paused: self.msg += 'paused containers = {}'.format(paused_containers) self.check_thresholds(paused_containers) self.msg += ' | paused_containers={}{}'.format(paused_containers, self.get_perf_thresholds()) elif self.stopped: self.msg += 'stopped containers = {}'.format(stopped_containers) self.check_thresholds(stopped_containers) self.msg += ' | stopped_containers={}{}'.format(stopped_containers, self.get_perf_thresholds()) elif self.total: self.msg += 'total containers = {}'.format(containers) self.check_thresholds(containers) self.msg += ' | total_containers={}{}'.format(containers, self.get_perf_thresholds()) else: self.msg += 'containers = {}, running containers = {}, paused containers = {}, stopped containers = {}'\ .format(containers, running_containers, paused_containers, stopped_containers) self.msg += ' | containers={} running_containers={} paused_containers={} stopped_containers={}'\ .format(containers, running_containers, paused_containers, stopped_containers)
def connection(self, host, port, user, password, ssl=False, **kwargs): # must set X-Requested-By in newer versions of Ambari self.x_requested_by = user if user == 'admin': self.x_requested_by = os.getenv('USER', user) #log.info("contacting Ambari as '%s'" % self.user) if not isHost(host) or not isPort(port) or not isUser(user) or not password: raise InvalidOptionException('invalid options passed to AmbariBlueprint()') proto = 'http' # pylint: disable=unused-variable if ssl: proto = 'https' self.host = host self.port = port self.user = user self.password = password # if kwargs.has_key('strip_config') and kwargs['strip_config']: if 'strip_config' in kwargs and kwargs['strip_config']: self.strip_config = True self.url_base = '%(proto)s://%(host)s:%(port)s/api/v1' % locals() if 'dir' in kwargs and kwargs['dir']: self.blueprint_dir = kwargs['dir'] if not isDirname(self.blueprint_dir): qquit('UNKNOWN', 'invalid dir arg passed to AmbariBlueprintTool') try: if not self.blueprint_dir or not os.path.exists(self.blueprint_dir): log.info("creating blueprint data dir '%s'" % self.blueprint_dir) os.mkdir(self.blueprint_dir) if not os.path.isdir(self.blueprint_dir): raise IOError("blueprint dir '%s'already taken and is not a directory" % self.blueprint_dir) except IOError as _: die("'failed to create dir '%s': %s" % (self.blueprint_dir, _))
def output(self, connect_time, total_time): precision = self.precision cell_info = "HBase table '{0}' row '{1}' column '{2}'".format(self.table, self.row, self.column) value = self.value self.msg = "cell value = '{0}'".format(value) if isFloat(value): log.info('value is float, checking thresholds') self.check_thresholds(value) self.msg += " for {0}".format(cell_info) query_time = self.timings[self.column]['read'] perfdata = '' perfdata += ' total_time={0:0.{precision}f}ms'.format(total_time, precision=precision) perfdata += ' connect_time={0:0.{precision}f}ms'.format(connect_time, precision=precision) perfdata += ' query_time={0:0.{precision}f}ms'.format(query_time, precision=precision) # show the timings at the end of the user output as well as in the graphing perfdata section self.msg += ',' + perfdata self.msg += ' |' if self.graph: if isFloat(value): self.msg += ' value={0}'.format(value) if self.units: self.msg += str(self.units) self.msg += self.get_perf_thresholds() else: self.msg += ' value=NaN' self.msg += perfdata
def parse_json(self, json_data): log.info('parsing response') try: bean = json_data['beans'][0] space_used_pc = bean['PercentUsed'] # the way below is more informative #assert type(space_used_pc) == float if re.search(r'e-\d+$', str(space_used_pc)): space_used_pc = 0 if not isFloat(space_used_pc): raise UnknownError("non-float returned for PercentUsed by namenode '{0}:{1}'"\ .format(self.host, self.port)) assert space_used_pc >= 0 stats = {} for stat in ('Total', 'TotalBlocks', 'TotalFiles', 'Used'): stats[stat] = bean[stat] if not isInt(stats[stat]): raise UnknownError("non-integer returned for {0} by namenode '{1}:{2}'"\ .format(stat, self.host, self.port)) stats[stat] = int(stats[stat]) self.ok() self.msg = 'HDFS space used = {0:.2f}% ({1}/{2})'\ .format(space_used_pc, humanize.naturalsize(stats['Used']), humanize.naturalsize(stats['Total'])) self.check_thresholds(space_used_pc) self.msg += ", in {0:d} files spread across {1:d} blocks".format(stats['TotalFiles'], stats['TotalBlocks']) self.msg += " | 'HDFS % space used'={0:f}%{1}".format(space_used_pc, self.get_perf_thresholds()) self.msg += " 'HDFS space used'={0:d}b".format(stats['Used']) self.msg += " 'HDFS file count'={0:d}".format(stats['TotalFiles']) self.msg += " 'HDFS block count'={0:d}".format(stats['TotalBlocks']) except KeyError as _: raise UnknownError("failed to parse json returned by NameNode at '{0}:{1}': {2}. {3}"\ .format(self.host, self.port, _, support_msg_api())) except ValueError as _: raise UnknownError("invalid json returned for LiveNodes by Namenode '{0}:{1}': {2}"\ .format(self.host, self.port, _))
def check_app_elapsed_times(self, app_list): num_apps_breaching_sla = 0 max_elapsed = 0 matching_apps = 0 max_threshold_msg = '' # save msg as check_thresholds appends to it which we want to reset in this case msg = self.msg for app in app_list: if not self.app_selector(app): continue name = app['name'] matching_apps += 1 elapsed_time = app['elapsedTime'] assert isInt(elapsed_time) elapsed_time = int(elapsed_time / 1000) threshold_msg = self.check_thresholds(elapsed_time) if threshold_msg: num_apps_breaching_sla += 1 log.info("app '%s' is breaching SLA", name) if elapsed_time > max_elapsed: max_elapsed = elapsed_time max_threshold_msg = threshold_msg if max_threshold_msg: max_threshold_msg = ' ' + max_threshold_msg # restore msg prefix as check_thresholds appends every threshold breach self.msg = msg return (num_apps_breaching_sla, matching_apps, max_elapsed, max_threshold_msg)
def get_tables(self): log.info('getting table list') try: process = subprocess.Popen(['hbase', 'shell'], stdin=PIPE, stdout=PIPE, stderr=subprocess.STDOUT) (stdout, _) = process.communicate('list') process.wait() if process.returncode != 0: print('ERROR:', end='') die(stdout) lines = stdout.split('\n') lineno = 1 for line in lines: if self.table_list_header_regex.search(line): break lineno += 1 if lineno > len(lines): die("Failed to parse table list output (couldn't find the starting line TABLE)") tables = set() for line in lines[lineno:]: if self.table_list_end_regex.search(line): break line = line.strip() if not line: continue tables.add(line) return tables except OSError as _: die("OSError running hbase shell to list tables: {0}".format(_)) except subprocess.CalledProcessError as _: print('Failed to get tables using HBase shell:\n') print(_.output) sys.exit(_.returncode)
def run(self): tables = self.get_tables() if not tables: die('No Tables Found') if self.get_opt('list_tables'): print('Tables:\n\n' + '\n'.join(tables)) sys.exit(3) tables_to_flush = set() if self.table_regex: log.info('filtering tables based on regex') for table in sorted(list(tables)): if self.table_regex.search(table): tables_to_flush.add(table) else: tables_to_flush = sorted(list(tables)) if log.isEnabledFor(logging.INFO): log.info('Flushing tables:\n\n%s\n', '\n'.join(tables_to_flush)) flush_commands = '\n'.join(["flush '{0}'".format(table) for table in tables_to_flush]) try: # by having stdout and stderr go to the same place more likely the output will be in a sane order process = subprocess.Popen(['hbase', 'shell'], stdin=PIPE, stdout=PIPE, stderr=subprocess.STDOUT) (stdout, _) = process.communicate(input=flush_commands) process.wait() if process.returncode != 0: print('ERROR:', end='') die(stdout) print(stdout) except OSError as _: die("OSError running hbase shell to flush tables: {0}".format(_)) except subprocess.CalledProcessError as _: print('Failed to get tables using HBase shell:\n') print(_.output) sys.exit(_.returncode)
def run(self): expected = self.get_opt("expected") if expected is not None: validate_regex(expected) log.info("expected version regex: %s", expected) cmd = "consul version" log.debug("cmd: " + cmd) proc = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT) (stdout, _) = proc.communicate() log.debug("stdout: " + str(stdout)) returncode = proc.wait() log.debug("returncode: " + str(returncode)) if returncode != 0 or (stdout is not None and "Error" in stdout): raise CriticalError("consul returncode: {0}, output: {1}".format(returncode, stdout)) version = None for line in str(stdout).split("\n"): match = self.version_regex.match(line) if match: version = match.group(1) if not version: raise UnknownError( "Consul version not found in output. Consul output may have changed. {0}".format(support_msg()) ) if not isVersion(version): raise UnknownError("Consul version unrecognized '{0}'. {1}".format(version, support_msg())) self.ok() self.msg = "Consul version = {0}".format(version) if expected is not None and not re.search(expected, version): self.msg += " (expected '{0}')".format(expected) self.critical()
def run(self): start = time.time() self._read_value = self.read() stop = time.time() self._read_timing = stop - start log.info('read in %s secs', self._read_timing) log.info("value = '%s'", self._read_value)
def check(self, client): log.info('running API ping') if client.ping(): self.msg = 'Docker API Ping successful' else: self.critical() self.msg = 'Docker API Ping Failed'
def run(self): expected = self.get_opt('expected') if expected is not None: validate_regex(expected) log.info('expected version regex: %s', expected) cmd = 'nodetool version' log.debug('cmd: ' + cmd) proc = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT) (stdout, _) = proc.communicate() log.debug('stdout: ' + str(stdout)) returncode = proc.wait() log.debug('returncode: ' + str(returncode)) if returncode != 0 or (stdout is not None and 'Error' in stdout): raise CriticalError('nodetool returncode: {0}, output: {1}'.format(returncode, stdout)) version = None for line in str(stdout).split('\n'): match = self.version_regex.match(line) if match: version = match.group(1) if not version: raise UnknownError('Cassandra version not found in output. Nodetool output may have changed. {0}'. format(support_msg())) if not isVersion(version): raise UnknownError('Cassandra version unrecognized \'{0}\'. {1}'.format(version, support_msg())) self.ok() self.msg = 'Cassandra version = {0}'.format(version) if expected is not None and not re.search(expected, version): self.msg += " (expected '{0}')".format(expected) self.critical()
def run(self): if not self.args: self.usage('no git directory args given') self.origin = self.get_opt('origin') args = uniq_list_ordered(self.args) self.branch_prefix = self.get_opt('branch_prefix') if self.branch_prefix is not None: validate_regex(self.branch_prefix, 'branch prefix') self.branch_prefix = re.compile(self.branch_prefix) for arg in args: if not os.path.exists(arg): print("'%s' not found" % arg) sys.exit(ERRORS['WARNING']) if os.path.isfile(arg): log_option('file', arg) elif os.path.isdir(arg): log_option('directory', arg) else: die("path '%s' could not be determined as either a file or directory" % arg) for arg in args: self.check_git_branches_upstream(arg) if self.status == "OK": log.info('SUCCESS - All Git branches are tracking the expected upstream origin branches') else: log.critical('FAILED') sys.exit(ERRORS['CRITICAL'])
def run(self): self.no_args() host = self.get_opt('host') port = self.get_opt('port') validate_host(host) validate_port(port) log.info('querying Tachyon Master') url = 'http://%(host)s:%(port)s/workers' % locals() log.debug('GET %s' % url) try: req = requests.get(url) except requests.exceptions.RequestException as _: qquit('CRITICAL', _) log.debug("response: %s %s" % (req.status_code, req.reason)) log.debug("content:\n{0}\n{1}\n{2}".format('='*80, req.content.strip(), '='*80)) if req.status_code != 200: qquit('CRITICAL', "%s %s" % (req.status_code, req.reason)) soup = BeautifulSoup(req.content, 'html.parser') dead_workers = 0 try: dead_workers = len([_ for _ in soup.find(id='data2').find('tbody').find_all('tr') if _]) except (AttributeError, TypeError): qquit('UNKNOWN', 'failed to find parse Tachyon Master info for dead workers' % self.__dict__) try: dead_workers = int(dead_workers) except (ValueError, TypeError): qquit('UNKNOWN', 'Tachyon Master dead workers parsing returned non-integer: {0}'.format(dead_workers)) self.msg = 'Tachyon dead workers = {0}'.format(dead_workers) # pylint: disable=attribute-defined-outside-init self.ok() # TODO: thresholds on number of dead workers (coming soon) if dead_workers: self.critical()
def parse_json(self, json_data): if not isList(json_data): raise UnknownError('non-list returned by Presto for nodes. {0}'.format(support_msg_api())) nodes_lagging = [] max_lag = 0 re_protocol = re.compile('^https?://') num_nodes = len(json_data) for node_item in json_data: last_response_time = node_item['lastResponseTime'] last_response_datetime = datetime.strptime(last_response_time, '%Y-%m-%dT%H:%M:%S.%fZ') timedelta = datetime.utcnow() - last_response_datetime response_age = int(timedelta.total_seconds()) if response_age > max_lag: max_lag = response_age if response_age > self.max_age: uri = node_item['uri'] uri = re_protocol.sub('', uri) nodes_lagging += [uri] log.info("node '%s' last response age %d secs > max age %s secs", node_item['uri'], response_age, self.max_age) else: log.info("node '%s' last response age %d secs", node_item['uri'], response_age) num_nodes_lagging = len(nodes_lagging) self.msg = 'Presto SQL - worker nodes with response timestamps older than {0:d} secs = {1:d}'\ .format(self.max_age, num_nodes_lagging) self.check_thresholds(num_nodes_lagging) self.msg += ' out of {0:d} nodes'.format(num_nodes) if num_nodes < 1: self.warning() self.msg += ' (< 1 worker found)' self.msg += ', current max response age = {0:.2f} secs'.format(max_lag) if self.verbose and nodes_lagging: self.msg += ' [{0}]'.format(', '.join(nodes_lagging)) self.msg += ' | num_nodes_lagging={0}{1} max_response_age={2:.2f}s'\ .format(num_nodes_lagging, self.get_perf_thresholds(), max_lag)
def check_git_branches_upstream(self, target): target = os.path.abspath(target) gitroot = find_git_root(target) if gitroot is None: die('Failed to find git root for target {0}'.format(target)) log.debug("finding branches for target '{0}'".format(target)) repo = git.Repo(gitroot) branches = repo.branches if self.branch_prefix is not None: log.debug('restricting to branches matching branch prefix') branches = [x for x in branches if self.branch_prefix.match(str(x))] if not branches: log.error("No branches matching '%s' for target '%s'", self.get_opt('branch_prefix'), target) self.status = 'NO BRANCHES' #if log.isEnabledFor(logging.DEBUG): #log.debug('\n\nbranches for target %s:\n\n%s\n', target, '\n'.join(list(branches))) for branch in branches: expected = '{0}/{1}'.format(self.origin, branch) tracking_branch = str(branch.tracking_branch()) if tracking_branch == expected: log.info("OK: repo '{0}' branch '{1}' is tracking '{2}'" .format(gitroot, branch, tracking_branch)) else: self.status = "ERROR" log.error("BAD: branch '{0}' is tracking '{1}' (expected '{2}')" .format(branch, tracking_branch, expected))
def process_args(self): args = uniq_list_ordered(self.args) if not args: self.usage('no directories specified as arguments') log_option('directories', args) self.compare_by_name = self.get_opt('name') self.compare_by_size = self.get_opt('size') self.compare_by_checksum = self.get_opt('checksum') self.regex = self.get_opt('regex') self.quiet = self.get_opt('quiet') self.no_short_circuit = self.get_opt('no_short_circuit') self.include_dot_dirs = self.get_opt('include_dot_dirs') if self.regex: if '(' not in self.regex: log.info('regex no capture brackets specified, will capture entire given regex') self.regex = '(' + self.regex + ')' validate_regex(self.regex) self.re_compiled = re.compile(self.regex, re.I) if not (self.compare_by_name or self.compare_by_size or self.compare_by_checksum or self.regex): self.compare_by_name = True #self.compare_by_size = True self.compare_by_checksum = True log_option('compare by name', self.compare_by_name) log_option('compare by size', self.compare_by_size) log_option('compare by checksum', self.compare_by_checksum) log_option('compare by regex', True if self.regex else False) return args
def check_version(self, filename, branch, branch_base, arg_var, found_version, branch_version): self.branches_dockerfile_checked.add(branch) self.dockerfiles_checked.add(filename) if arg_var: log.debug("found arg '%s'", arg_var) arg_version = "ARG '{0}={1}'".format(arg_var, found_version) else: arg_version = "'{0}'".format(found_version) #log.debug("arg '%s' matches branch base '%s'", argversion.group(1), branch_base) log.debug("comparing '%s' contents to version derived from branch '%s' => '%s'", filename, branch, branch_version) if not isVersion(branch_version.lstrip('jdk').lstrip('jre')): die("unrecognized branch version '{0}' for branch_base '{1}'" .format(branch_version, branch_base)) #if branch_version == found_version or branch_version == found_version.split('.', 2)[0]: if found_version[0:len(branch_version)] == branch_version: log.info("{0} version '{1}' matches {2}". format(self.valid_git_branches_msg, branch_version, arg_version)) else: log.error("{0} version '{1}' vs Dockerfile {2}". format(self.invalid_git_branches_msg, branch_version, arg_version)) self.dockerfiles_failed += 1 self.branches_failed.add(branch) return False return True
def is_file_dup_by_hash(self, filepath): checksum = None size = self.is_file_dup_by_size(filepath) if size: log.info("found file '%s' of matching size '%s' bytes", filepath, size) checksum = self.hash(filepath) self.sizes[size][filepath] = checksum self.hashes[checksum] = self.hashes.get(checksum, set()) self.hashes[checksum].add(filepath) else: self.sizes[size] = {} self.sizes[size][filepath] = None sizeitem = self.sizes[size] if len(sizeitem) < 2: pass elif len(sizeitem) == 2: for filepath in sizeitem: if sizeitem[filepath] is None: log.info("backtracking to now hash first file '%s'", filepath) checksum = self.hash(filepath) sizeitem[filepath] = checksum self.hashes[checksum] = self.hashes.get(checksum, set()) self.hashes[checksum].add(filepath) if checksum is not None and len(self.hashes[checksum]) > 1: self.dups_by_hash[checksum] = self.dups_by_hash.get(checksum, set()) for filepath in self.hashes[checksum]: self.dups_by_hash[checksum].add(filepath) return True return False
def run(self): self.no_args() host = self.get_opt('host') port = self.get_opt('port') validate_host(host) validate_port(port) log.info('querying Tachyon Master') url = 'http://%(host)s:%(port)s/home' % locals() log.debug('GET %s' % url) try: req = requests.get(url) except requests.exceptions.RequestException as _: qquit('CRITICAL', _) log.debug("response: %s %s" % (req.status_code, req.reason)) log.debug("content:\n{0}\n{1}\n{2}".format('='*80, req.content.strip(), '='*80)) if req.status_code != 200: qquit('CRITICAL', "Non-200 response! %s %s" % (req.status_code, req.reason)) soup = BeautifulSoup(req.content, 'html.parser') try: running_workers = soup.find('th', text=re.compile(r'Running\s+Workers:?', re.I))\ .find_next_sibling().get_text() except (AttributeError, TypeError): qquit('UNKNOWN', 'failed to find parse Tachyon Master info for running workers' % self.__dict__) try: running_workers = int(running_workers) except (ValueError, TypeError): qquit('UNKNOWN', 'Tachyon Master live workers parsing returned non-integer: {0}'.format(running_workers)) self.msg = 'Tachyon running workers = {0}'.format(running_workers) # pylint: disable=attribute-defined-outside-init self.ok() # TODO: thresholds on number of live workers (coming soon) if running_workers < 1: self.critical()
def run(self): self.no_args() host = self.get_opt('host') port = self.get_opt('port') validate_host(host) validate_port(port) self.validate_thresholds(integer=False) url = 'http://%(host)s:%(port)s/master-status' % locals() log.debug('GET %s', url) try: req = requests.get(url) except requests.exceptions.RequestException as _: qquit('CRITICAL', _) log.debug("response: %s %s", req.status_code, req.reason) log.debug("content:\n%s\n%s\n%s", '='*80, req.content.strip(), '='*80) if req.status_code != 200: qquit('CRITICAL', ("%s %s" % (req.status_code, req.reason))) self.parse_output(req.content) log.info('server with min regions = %s regions on %s', self.server_min_regions[1], self.server_min_regions[0]) log.info('server with max regions = %s regions on %s', self.server_max_regions[1], self.server_max_regions[0]) imbalance = self.calculate_imbalance() self.msg = '{0}% region imbalance'.format(imbalance) self.check_thresholds(imbalance) self.msg += ' between HBase RegionServers hosting the most vs least number of regions' self.msg += ' (min = {0}, max = {1})'.format(self.server_min_regions[1], self.server_max_regions[1]) self.msg += " | '% region imbalance'={0}%".format(imbalance) self.msg += self.get_perf_thresholds() self.msg += ' min_regions={0} max_regions={1}'.format(self.server_min_regions[1], self.server_max_regions[1])
def consume(self): self.check_connection() self.check_channel() def connection_timeout_handler(): raise CriticalError("unique message not returned on queue '{queue}' within {secs:.2f} secs"\ .format(queue=self.queue, secs=self.timeout / 3) + \ ", consumer timed out while consuming messages from {name} broker '{host}:{port}'"\ .format(name=self.name, host=self.host, port=self.port)) self.conn.add_timeout(self.timeout / 3, connection_timeout_handler) # don't re-declare, queue should still exist otherwise error out #channel.queue_declare(queue = 'hello') # don't ack as messages could stay in queue indefinitely self.consumer_tag = self.channel.basic_consume(self.consumer_callback, queue=self.queue, # let broker autogenerate consumer_tag # consumer_tag = self.consumer_tag), no_ack=self.no_ack ) # could also use non-callback mechanism - generator that yields tuples (method, properties, body) # requires self.channel.cancel() from within loop # self.channel.consume(self.queue, # no_ack = True, # exclusive = True, # arguments = None, # inactivity_timeout = self.timeout/3) log.debug('start consuming') self.channel.start_consuming() # could instead use basic_get to return single message # self.channel.basic_get(queue = self.queue, no_ack = True) log.info('closing connection to broker') self.conn.close(reply_code=200, reply_text='Normal shutdown') return self.consumed_message
def run(self): self.no_args() directory = self.get_opt('directory') validate_directory(directory) directory = os.path.abspath(directory) self.remote = self.get_opt('remote') validate_chars(self.remote, 'remote', r'A-Za-z0-9_\.-') try: repo = git.Repo(directory) except InvalidGitRepositoryError as _: raise CriticalError("directory '{}' does not contain a valid Git repository!".format(directory)) try: if not self.get_opt('no_fetch'): log.info('fetching from remote repo: {}'.format(self.remote)) repo.git.fetch(self.remote) branch = repo.active_branch log.info('active branch: %s', branch) commits_behind = repo.iter_commits('{branch}..{remote}/{branch}'.format(remote=self.remote, branch=branch)) commits_ahead = repo.iter_commits('{remote}/{branch}..{branch}'.format(remote=self.remote, branch=branch)) num_commits_behind = sum(1 for c in commits_behind) num_commits_ahead = sum(1 for c in commits_ahead) # happens with detached HEAD checkout like Travis CI does except TypeError as _: raise CriticalError(_) except GitCommandError as _: raise CriticalError(', '.join(str(_.stderr).split('\n'))) self.msg = "git checkout branch '{}' is ".format(branch) if num_commits_ahead + num_commits_behind == 0: self.ok() self.msg += 'up to date with' else: self.critical() self.msg += '{} commits behind, {} commits ahead of'.format(num_commits_behind, num_commits_ahead) self.msg += " remote '{}'".format(self.remote) self.msg += ' | commits_behind={};0;0 commits_ahead={};0;0'.format(num_commits_behind, num_commits_ahead)
def check_http(self, host, port, url_path=''): if not isStr(url_path): url_path = '' url = '{protocol}://{host}:{port}/{url_path}'.format(protocol=self.protocol, host=host, port=port, url_path=url_path.lstrip('/')) log.info('GET %s', url) try: # timeout here isn't total timeout, it's response time req = requests.get(url, timeout=self.request_timeout) except requests.exceptions.RequestException: return False except IOError: return False log.debug("%s - response: %s %s", url, req.status_code, req.reason) log.debug("%s - content:\n%s\n%s\n%s", url, '='*80, req.content.strip(), '='*80) if req.status_code != 200: return None if self.regex: log.info('%s - checking regex against content', url) if self.regex.search(req.content): log.info('%s - regex matched http output', url) else: log.info('%s - regex did not match http output', url) return None log.info("%s - passed all checks", url) return (host, port)
def check_media_file(self, filename): valid_media_msg = '%s => OK' % filename invalid_media_msg = '%s => INVALID' % filename try: # cmd = self.validate_cmd.format(filename) cmd = self.validate_cmd log.debug('cmd: %s %s', cmd, filename) log.info('verifying {0}'.format(filename)) # capturing stderr to stdout because ffprobe prints to stderr in all cases # Python 2.7+ #subprocess.check_output(cmd.split() + [filename], stderr=subprocess.STDOUT) proc = subprocess.Popen(cmd.split() + [filename], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) (stdout, _) = proc.communicate() returncode = proc.wait() if returncode != 0 or (stdout is not None and 'Error' in stdout): _ = CalledProcessError(returncode, cmd) _.output = stdout raise _ print(valid_media_msg) except CalledProcessError as _: if self.verbose > 2: print(_.output) if self.skip_errors: print(invalid_media_msg) self.failed = True return False die(invalid_media_msg)
def run(self): version = self.get_version() log.info("got version '%s'", version) self.check_version(version) extra_info = self.extra_info() if extra_info: self.msg += extra_info
def parse_json(self, json_data): log.info('parsing response') try: data = json_data['beans'][0] name_dir_statuses = data['NameDirStatuses'] name_dir_data = json.loads(name_dir_statuses) active_dirs = name_dir_data['active'] failed_dirs = name_dir_data['failed'] num_active_dirs = len(active_dirs) num_failed_dirs = len(failed_dirs) self.msg = 'NameNode has {0} failed dir{1}'.format(num_failed_dirs, plural(num_failed_dirs)) if num_failed_dirs > 0: self.warning() if self.verbose: self.msg += ' ({0})'.format(', '.join(failed_dirs)) self.msg += ', {0} active dir{1}'.format(num_active_dirs, plural(num_active_dirs)) if num_active_dirs < 1: self.critical() if self.verbose and num_active_dirs > 0: self.msg += ' ({0})'.format(', '.join(active_dirs)) self.msg += ' | num_failed_dirs={0} num_active_dirs={1}'.format(num_failed_dirs, num_active_dirs) except KeyError as _: raise UnknownError("failed to parse json returned by NameNode at '{0}:{1}': {2}. {3}"\ .format(self.host, self.port, _, support_msg_api())) except ValueError as _: raise UnknownError("invalid json returned for NameDirStatuses by Namenode '{0}:{1}': {2}"\ .format(self.host, self.port, _))
def connect(self): log.info('connecting to HBase Thrift Server at %s:%s', self.host, self.port) try: # cast port to int to avoid low level socket module TypeError for ports > 32000 self.conn = happybase.Connection(host=self.host, port=int(self.port), timeout=10 * 1000) # ms except (socket.error, socket.timeout, ThriftException, HBaseIOError) as _: qquit('CRITICAL', 'error connecting: {0}'.format(_))
def run(self): ############### # == Write == # start = time.time() self.write() end = time.time() self._write_timing = end - start log.info("read in %s secs", self._read_timing) ############## # == Read == # # Python 2.x super(KeyWriteNagiosPlugin, self).run() # Python 3.x # super().run() if self._read_value != self._write_value: raise CriticalError( "read back value '%s' does not match written value '%s'!" % (self._read_value, self._write_value) ) ################ # == Delete == # start = time.time() self.delete() end = time.time() self._delete_timing = end - start log.info("read in %s secs", self._read_timing)
def run(self): self.no_args() host = self.get_opt('host') port = self.get_opt('port') user = self.get_opt('user') password = self.get_opt('password') if self.get_opt('ssl'): self.protocol = 'https' history_mins = self.get_opt('history_mins') num = self.get_opt('num') #inventory_id = self.get_opt('id') source = self.get_opt('source') dest = self.get_opt('dest') max_age = self.get_opt('max_age') max_runtime = self.get_opt('max_runtime') validate_host(host) validate_port(port) validate_user(user) validate_password(password) validate_float(history_mins, 'history mins') self.history_mins = float(history_mins) filter_opts = {} if self.history_mins: now = datetime.now() filter_opts['dateRangeStart'] = datetime.strftime(now - timedelta(minutes=self.history_mins), '%F %H:%M:%S') filter_opts['dateRangeEnd'] = datetime.strftime(now, '%F %H:%M:%S') if num is not None: validate_int(num, 'num ingestions', 1) #if inventory_id is not None: # validate_chars(inventory_id, 'ingestion id', r'\w-') # filter_opts['inventoryId'] = inventory_id if source is not None: log_option('source', source) filter_opts['fileName'] = source if dest is not None: log_option('dest', dest) filter_opts['destinationPath'] = dest if max_age is not None: validate_float(max_age, 'max age', 1) max_age = float(max_age) if max_runtime is not None: validate_float(max_runtime, 'max incomplete runtime', 1) max_runtime = float(max_runtime) self.url_base = '{protocol}://{host}:{port}/bedrock-app/services/rest'.format(host=host, port=port, protocol=self.protocol) # auth first, get JSESSIONID cookie # cookie jar doesn't work in Python or curl, must extract JSESSIONID to header manually #self.jar = cookielib.CookieJar() log.info('authenticating to Zaloni Bedrock') (_, self.auth_time) = self.req(url='{url_base}/admin/getUserRole'.format(url_base=self.url_base), # using json instead of constructing string manually, # this correctly escapes backslashes in password body=json.dumps({"username": user, "password": password})) if self.get_opt('list'): self.list_ingestions(num=num) self.check_ingestion(num=num, filter_opts=filter_opts, max_age=max_age, max_runtime=max_runtime)
def run(self): log.info("querying %s", self.software) url = "{protocol}://{host}:{port}/PolicyManagement/{api_version}/deployments".format( host=self.host, port=self.port, api_version=self.api_version, protocol=self.protocol ) log.debug("GET %s", url) try: req = requests.get(url, auth=HTTPBasicAuth(self.user, self.password)) except requests.exceptions.RequestException as _: errhint = "" if "BadStatusLine" in str(_.message): errhint = " (possibly connecting to an SSL secured port without using --ssl?)" elif self.protocol == "https" and "unknown protocol" in str(_.message): errhint = " (possibly connecting to a plain HTTP port with the -S / --ssl switch enabled?)" qquit("CRITICAL", str(_) + errhint) log.debug("response: %s %s", req.status_code, req.reason) log.debug("content:\n%s\n%s\n%s", "=" * 80, req.content.strip(), "=" * 80) if req.status_code == 400 and req.reason == "Bad Request": qquit( "CRITICAL", "{0}: {1} (possibly new install with no deployments yet?)".format(req.status_code, req.reason), ) if req.status_code != 200: qquit("CRITICAL", "{0}: {1}".format(req.status_code, req.reason)) try: json_list = json.loads(req.content) if log.isEnabledFor(logging.DEBUG): print(jsonpp(json_list)) print("=" * 80) if not isList(json_list): raise ValueError("returned content is not a list") if not json_list: qquit("UNKNOWN", "no deployments found") last_deployment = json_list[0] userid = last_deployment["UserId"] description = last_deployment["Description"] hostname = last_deployment["HostName"] timestamp = last_deployment["timestamp"] last_deploy_datetime = datetime.strptime(timestamp, "%b %d, %Y %H:%M:%S %p") except (KeyError, ValueError) as _: qquit( "UNKNOWN", "error parsing output from {software}: {exception}: {error}. {support_msg}".format( software=self.software, exception=type(_).__name__, error=_, support_msg=support_msg_api() ), ) timedelta = datetime.now() - last_deploy_datetime mins = int(int(timedelta.total_seconds()) / 60) self.msg = "{software} last deployment was at '{timestamp}', {mins} mins ago".format( software=self.software, timestamp=timestamp, mins=mins ) self.check_thresholds(mins) if self.verbose: self.msg += " by user '{userid}', host = '{hostname}', description = '{description}'".format( userid=userid, hostname=hostname, description=description ) self.msg += " | mins_since_last_deployment={mins}{thresholds}".format( mins=mins, thresholds=self.get_perf_thresholds(boundary="lower") )
def run(self): job_id = self.get_opt('job_id') travis_token = self.get_opt('travis_token') if job_id is None: travis_token = os.getenv('JOB_ID') if travis_token is None: travis_token = os.getenv('TRAVIS_TOKEN') #if travis_token is None: # self.usage('--travis-token option or ' + # '$TRAVIS_TOKEN environment variable required to authenticate to the API') validate_chars(job_id, 'job id', '0-9') validate_alnum(travis_token, 'travis token') headers = { 'Content-Type': 'application/json', 'Accept': 'application/json', 'Travis-API-Version': '3', 'Authorization': 'token {0}'.format(travis_token) } log.info('triggering debug job {job_id}'.format(job_id=job_id)) url = 'https://api.travis-ci.org/job/{job_id}/debug'.format( job_id=job_id) log.debug('POST %s' % url) try: req = requests.post(url, headers=headers) except requests.exceptions.RequestException as _: raise CriticalError(_) log.debug("response: %s %s", req.status_code, req.reason) log.debug("content:\n%s\n%s\n%s", '=' * 80, req.content.strip(), '=' * 80) if req.status_code == 409: error_message = '' try: _ = json.loads(req.content) error_message = _['error_message'] except ValueError: pass error_message += ( " (if you've just retriggered this you can avoid this error " + "using the --ignore-running switch)") if self.get_opt('ignore_running'): log.info('job already running (ignoring)') else: log.info('job already running') raise CriticalError('{0} {1}: {2}'.format( req.status_code, req.reason, error_message)) elif req.status_code != 202: raise CriticalError("%s %s" % (req.status_code, req.reason)) # don't need to query this if using the API address rather than the web UI address # as we don't need to figure out the repo name, just use the job id by itself # url = 'https://api.travis-ci.org/job/{job_id}'.format(job_id=job_id) # log.debug('GET %s' % url) # try: # req = requests.get(url, headers=headers) # except requests.exceptions.RequestException as _: # raise CriticalError(_) # log.debug("response: %s %s", req.status_code, req.reason) # log.debug("content:\n%s\n%s\n%s", '='*80, req.content.strip(), '='*80) # if req.status_code != 200: # raise CriticalError("%s %s" % (req.status_code, req.reason)) # # repo = None # try: # repo = json.loads(req.content)['repository']['slug'] # except ValueError as _: # raise ssh_address = self.get_ssh_address(job_id=job_id) log.info('Executing: ssh -- {0}'.format(ssh_address)) sys.stdout.flush() sys.stderr.flush() self.disable_timeout() os.execvp('ssh', ['--', ssh_address])
def run(self): csv_file = self.get_opt('csv') avro_dir = self.get_opt('avro_dir') has_header = self.get_opt('has_header') # I don't know why the Spark guys made this a string instead of a bool header_str = 'false' if has_header: header_str = 'true' schema = self.get_opt('schema') # let Spark fail if csv/avro dir aren't available # can't check paths exist as want to remain generically portable # to HDFS, local filesystm or any other uri scheme Spark supports log.info("CSV Source: %s" % csv_file) log.info("Avro Destination: %s" % avro_dir) if schema: def get_type(arg): arg = str(arg).lower() if arg not in self.types_mapping: self.usage( "invalid type '%s' defined in --schema, must be one of: %s" % (arg, ', '.join(sorted(self.types_mapping.keys())))) # return self.types_mapping[arg] module = __import__('pyspark.sql.types', globals(), locals(), ['types'], -1) class_ = getattr(module, self.types_mapping[arg]) _ = class_() return _ def create_struct(arg): name = str(arg).strip() data_type = 'string' if ':' in arg: (name, data_type) = arg.split(':', 1) data_class = get_type(data_type) return StructField(name, data_class, True) # see https://github.com/databricks/spark-csv#python-api self.schema = StructType( [create_struct(_) for _ in schema.split(',')]) log.info('generated CSV => Spark schema') conf = SparkConf().setAppName('HS PySpark CSV => Avro') sc = SparkContext(conf=conf) # pylint: disable=invalid-name if self.verbose < 3 and 'setLogLevel' in dir(sc): sc.setLogLevel('WARN') sqlContext = SQLContext(sc) # pylint: disable=invalid-name spark_version = sc.version log.info('Spark version detected as %s' % spark_version) if not isVersionLax(spark_version): die("Spark version couldn't be determined. " + support_msg('pytools')) # pylint: disable=invalid-name df = None if isMinVersion(spark_version, 1.4): if has_header and not schema: log.info('inferring schema from CSV headers') df = sqlContext.read.format('com.databricks.spark.csv')\ .options(header=header_str, inferschema='true')\ .load(csv_file) else: log.info('using explicitly defined schema') schema = self.schema df = sqlContext.read\ .format('com.databricks.spark.csv')\ .options(header=header_str)\ .load(csv_file, schema=schema) else: die('Spark <= 1.3 is not supported due to avro dependency, sorry! ' + \ 'I may change this on request but prefer people just upgrade') # log.warn('running legacy code for Spark <= 1.3') # if has_header and not schema: # log.info('inferring schema from CSV headers') # df = sqlContext.load(source="com.databricks.spark.csv", path=csv_file, # header=header_str, inferSchema='true') # elif self.schema: # log.info('using explicitly defined schema') # df = sqlContext.load(source="com.databricks.spark.csv", path=csv_file, # header=header_str, schema=self.schema) # else: # die('no header and no schema, caught late') # this doesn't work in Spark <= 1.3 and the github docs don't mention the older methods for writing avro using # the databricks avro driver df.write.format('com.databricks.spark.avro').save(avro_dir)
def parse_results(self, content): build = self.get_latest_build(content) number = build['number'] log.info('build number = %s', number) if not isInt(number): raise UnknownError('build number returned is not an integer!') message = build['message'] log.info('message = %s', message) branch = build['branch'] log.info('branch = %s', branch) commit = build['commit'] log.info('commit = %s', commit) started_at = build['started_at'] log.info('started_at = %s', started_at) finished_at = build['finished_at'] log.info('finished_at = %s', finished_at) duration = build['duration'] log.info('duration = %s', duration) if not isInt(duration): raise UnknownError('duration returned is not an integer!') repository_id = build['repository_id'] log.info('repository_id = %s', repository_id) if not isInt(repository_id): raise UnknownError('repository_id returned is not an integer!') result = build['result'] log.info('result = %s', result) state = build['state'] log.info('state = %s', state) if result == 0: self.ok() status = "PASSED" else: self.critical() status = "FAILED" self.msg = "Travis CI build #{number} {status} for repo '{repo}' in {duration} secs".format(\ number=number, status=status, repo=self.repo, duration=duration) self.check_thresholds(duration) self.msg += ", started_at='{0}'".format(started_at) self.msg += ", finished_at='{0}'".format(finished_at) if self.verbose: self.msg += ", message='{0}'".format(message) self.msg += ", branch='{0}'".format(branch) self.msg += ", commit='{0}'".format(commit) self.msg += ", repository_id='{0}'".format(repository_id) if self.verbose or self.builds_in_progress > 0: self.msg += ", {0} build{1} in progress".format( self.builds_in_progress, plural(self.builds_in_progress)) self.msg += " | last_build_duration={duration}s{perf_thresholds} num_builds_in_progress={builds_in_progress}"\ .format(duration=duration, perf_thresholds=self.get_perf_thresholds(), builds_in_progress=self.builds_in_progress)
def process_options(self): self.expected = self.get_opt('expected') if self.expected is not None: validate_regex(self.expected) log.info('expected version regex: %s', self.expected)
def print_log(self, build=None, job_id=None): if job_id: self.print_job_log(job_id=job_id) log.info('=' * 80) log.info('end of log for job id %s', job_id) log.info('=' * 80 + '\n') else: if not build: code_error('no job id passed to print_log(), nor build to determine job from') log.info('getting job id for build #%s', build['number']) if 'jobs' not in build: raise UnknownError('no jobs field found in build, {0}'.format(support_msg_api)) for _ in build['jobs']: _id = _['id'] url = 'https://api.travis-ci.org/jobs/{id}'.format(id=_id) req = self.request_handler.get(url) # if this raises ValueError it'll be caught by run handler job_data = json.loads(req.content) if log.isEnabledFor(logging.DEBUG): log.debug("job id %s status:\n%s", _id, jsonpp(job_data)) if self.failed is True: if job_data['state'] == 'finished' and job_data['status'] in (None, 1, '1'): job = job_data else: job = job_data if not job: raise UnknownError('no job found in build {0}'.format(build['number'])) self.print_job_log(job=job) log.info('=' * 80) log.info('end of log for build number #%s job id %s', build['number'], job['id']) log.info('=' * 80 + '\n')
def process_result(self, result): _id = result['id'] log.info('latest build id: %s', _id) status = result['status'] log.info('status: %s', status) if not isInt(status, allow_negative=True): raise UnknownError( 'non-integer status returned by DockerHub API. {0}'.format( support_msg_api())) tag = result['dockertag_name'] log.info('tag: %s', tag) trigger = result['cause'] log.info('trigger: %s', trigger) created_date = result['created_date'] log.info('created date: %s', created_date) last_updated = result['last_updated'] log.info('last updated: %s', last_updated) created_datetime = datetime.datetime.strptime( created_date.split('.')[0], '%Y-%m-%dT%H:%M:%S') updated_datetime = datetime.datetime.strptime( last_updated.split('.')[0], '%Y-%m-%dT%H:%M:%S') build_latency_timedelta = updated_datetime - created_datetime build_latency = build_latency_timedelta.total_seconds() log.info('build latency (creation to last updated): %s', build_latency) # results in .0 floats anyway build_latency = int(build_latency) build_code = result['build_code'] build_url = 'https://hub.docker.com/r/{0}/builds/{1}'.format( self.repo, build_code) log.info('latest build URL: %s', build_url) if str(status) in self.statuses: status = self.statuses[str(status)] else: log.warning("status code '%s' not recognized! %s", status, support_msg_api()) log.warning('defaulting to assume status is an Error') status = 'Error' if status != 'Success': self.critical() self.msg += "'{repo}' last completed build status: '{status}', tag: '{tag}', build code: {build_code}"\ .format(repo=self.repo, status=status, tag=tag, build_code=build_code) if self.verbose: self.msg += ', id: {0}'.format(_id) self.msg += ', trigger: {0}'.format(trigger) self.msg += ', created date: {0}'.format(created_date) self.msg += ', last updated: {0}'.format(last_updated) self.msg += ', build_latency: {0}'.format(sec2human(build_latency)) self.msg += ', build URL: {0}'.format(build_url) self.msg += ' | build_latency={0:d}s'.format(build_latency)
def check_http(self, host, port, url_path=''): if not isStr(url_path): url_path = '' url = '{protocol}://{host}:{port}/{url_path}'.format( protocol=self.protocol, host=host, port=port, url_path=url_path.lstrip('/')) log.info('GET %s', url) try: # timeout here isn't total timeout, it's response time req = requests.get(url, timeout=self.request_timeout) except requests.exceptions.RequestException as _: log.info('%s - returned exception: %s', url, _) return False except IOError as _: log.info('%s - returned IOError: %s', url, _) return False log.debug("%s - response: %s %s", url, req.status_code, req.reason) log.debug("%s - content:\n%s\n%s\n%s", url, '=' * 80, req.content.strip(), '=' * 80) if req.status_code != 200: log.info('%s - status code %s != 200', url, req.status_code) return None if self.regex: log.info('%s - checking regex against content', url) # if this ends up not being processed properly and remains a string instead # of the expected compiled regex, then .search() will hang if isStr(self.regex): die('string found instead of expected compiled regex!') if self.regex.search(req.content): log.info('%s - regex matched http output', url) else: log.info('%s - regex did not match http output', url) return None log.info("%s - passed all checks", url) return (host, port)
def run(self): start = time.time() log.info('subscribing') self.subscribe() log.info('publishing message "%s"', self.publish_message) start_publish = time.time() self.publish() stop_publish = time.time() self._publish_time = round(stop_publish - start_publish, self._precision) log.info('published in %s secs', self._publish_time) if self.sleep_secs: log.info('sleeping for %s secs', self.sleep_secs) time.sleep(self.sleep_secs) start_consume = time.time() log.info('consuming message') self._consumed_message = self.consume() stop_consume = time.time() self._consume_time = round(stop_consume - start_consume, self._precision) log.info('consumed in %s secs', self._consume_time) log.info('consumed message = "%s"', self._consumed_message) # resetting to ok is bad - would break inheritance logic #self.ok() stop = time.time() self._total_time = round(stop - start, self._precision)
def get_table_conn(self): log.info('checking table \'%s\'', self.table) if not self.conn.is_table_enabled(self.table): qquit('CRITICAL', "table '{0}' is not enabled!".format(self.table)) table_conn = self.conn.table(self.table) return table_conn
def run(self): csv_file = self.get_opt('csv') parquet_dir = self.get_opt('parquet_dir') has_header = self.get_opt('has_header') # I don't know why the Spark guys made this a string instead of a bool header_str = 'false' if has_header: header_str = 'true' schema = self.get_opt('schema') # let Spark fail if csv/parquet aren't available # can't check paths exist as want to remain generically portable # to HDFS, local filesystm or any other uri scheme Spark supports log.info("CSV Source: %s" % csv_file) log.info("Parquet Destination: %s" % parquet_dir) if schema: def get_type(arg): arg = str(arg).lower() if arg not in self.types_mapping: self.usage("invalid type '%s' defined in --schema, must be one of: %s" % (arg, ', '.join(sorted(self.types_mapping.keys())))) # return self.types_mapping[arg] module = __import__('pyspark.sql.types', globals(), locals(), ['types'], -1) class_ = getattr(module, self.types_mapping[arg]) _ = class_() return _ def create_struct(arg): name = arg data_type = 'string' if ':' in arg: (name, data_type) = arg.split(':', 1) data_class = get_type(data_type) return StructField(name, data_class, True) # see https://github.com/databricks/spark-csv#python-api self.schema = StructType([create_struct(_) for _ in schema.split(',')]) log.info('generated CSV => Spark schema') conf = SparkConf().setAppName('HS PySpark CSV => Parquet') sc = SparkContext(conf=conf) # pylint: disable=invalid-name sqlContext = SQLContext(sc) # pylint: disable=invalid-name spark_version = sc.version log.info('Spark version detected as %s' % spark_version) if not isVersionLax(spark_version): die("Spark version couldn't be determined. " + support_msg('pytools')) # pylint: disable=invalid-name df = None if isMinVersion(spark_version, 1.4): if has_header and not schema: log.info('inferring schema from CSV headers') df = sqlContext.read.format('com.databricks.spark.csv')\ .options(header=header_str, inferschema='true')\ .load(csv_file) else: log.info('using explicitly defined schema') df = sqlContext.read\ .format('com.databricks.spark.csv')\ .options(header=header_str)\ .load(csv_file, schema=self.schema) df.write.parquet(parquet_dir) else: log.warn('running legacy code for Spark <= 1.3') if has_header and not schema: log.info('inferring schema from CSV headers') df = sqlContext.load(source="com.databricks.spark.csv", path=csv_file, header=header_str, inferSchema='true') elif self.schema: log.info('using explicitly defined schema') schema = self.schema df = sqlContext.load(source="com.databricks.spark.csv", path=csv_file, header=header_str, schema=schema) else: die('no header and no schema, caught late') df.saveAsParquetFile(parquet_dir)
def launch_job(self): log.info('triggering debug job {job_id}'.format(job_id=self.job_id)) url = 'https://api.travis-ci.org/job/{job_id}/debug'.format(job_id=self.job_id) self.request_handler.check_response_code = self.check_job_launch_response_code self.request_handler.post(url, headers=self.headers)
def run(self): self.no_args() host = self.get_opt('host') port = self.get_opt('port') user = self.get_opt('user') password = self.get_opt('password') if self.get_opt('ssl'): self.protocol = 'https' history_mins = self.get_opt('history_mins') num = self.get_opt('num') #inventory_id = self.get_opt('id') source = self.get_opt('source') dest = self.get_opt('dest') max_age = self.get_opt('max_age') max_runtime = self.get_opt('max_runtime') validate_host(host) validate_port(port) validate_user(user) validate_password(password) validate_float(history_mins, 'history mins') self.history_mins = float(history_mins) filter_opts = {} if self.history_mins: now = datetime.now() filter_opts['dateRangeStart'] = datetime.strftime( now - timedelta(minutes=self.history_mins), '%F %H:%M:%S') filter_opts['dateRangeEnd'] = datetime.strftime(now, '%F %H:%M:%S') if num is not None: validate_int(num, 'num ingestions', 1) #if inventory_id is not None: # validate_chars(inventory_id, 'ingestion id', r'\w-') # filter_opts['inventoryId'] = inventory_id if source is not None: log_option('source', source) filter_opts['fileName'] = source if dest is not None: log_option('dest', dest) filter_opts['destinationPath'] = dest if max_age is not None: validate_float(max_age, 'max age', 1) max_age = float(max_age) if max_runtime is not None: validate_float(max_runtime, 'max incomplete runtime', 1) max_runtime = float(max_runtime) self.url_base = '{protocol}://{host}:{port}/bedrock-app/services/rest'.format( host=host, port=port, protocol=self.protocol) # auth first, get JSESSIONID cookie # cookie jar doesn't work in Python or curl, must extract JSESSIONID to header manually #self.jar = cookielib.CookieJar() log.info('authenticating to Zaloni Bedrock') (_, self.auth_time) = self.req( url='{url_base}/admin/getUserRole'.format(url_base=self.url_base), # using json instead of constructing string manually, # this correctly escapes backslashes in password body=json.dumps({ "username": user, "password": password })) if self.get_opt('list'): self.list_ingestions(num=num) self.check_ingestion(num=num, filter_opts=filter_opts, max_age=max_age, max_runtime=max_runtime)
def get_ingestions(self, num=None, filter_opts=None): log.info('getting ingestion history') if num: chunk_size = num log.info('explicit number of results requested: %s', chunk_size) elif filter_opts: chunk_size = 10 log.info('filters detected, defaulting number of results to %s', chunk_size) else: chunk_size = 100 log.info('using catch all default result limit of %s', chunk_size) settings = {'chunkSize': chunk_size, 'currentPage': 1} if filter_opts is not None: if not isDict(filter_opts): code_error( 'passed non-dictionary for filter opts to get_ingestions') for key, value in sorted(filter_opts.items()): log.info("filter: '%s' = '%s'", key, value) settings = merge_dicts(settings, filter_opts) log.info('settings: %s', settings) log.info('querying Zaloni for ingestion history') (req, self.query_time) = self.req( url='{url_base}/ingestion/publish/getFileIndex'.format( url_base=self.url_base), # orders by newest first, but seems to return last 10 anyway body=json.dumps(settings)) try: log.info('parsing JSON response') json_dict = json.loads(req.content) except ValueError as _: qquit('UNKNOWN', 'error parsing json returned by Zaloni: {0}'.format(_)) return json_dict
def run(self): self.no_args() host = self.get_opt('host') port = self.get_opt('port') user = self.get_opt('user') password = self.get_opt('password') self._all = self.get_opt('all') workflow_id = self.get_opt('id') workflow_name = self.get_opt('name') max_age = self.get_opt('max_age') max_runtime = self.get_opt('max_runtime') if self.get_opt('ssl'): self.protocol = 'https' validate_host(host) validate_port(port) validate_user(user) validate_password(password) if workflow_id is not None: if workflow_name is not None: self.usage( 'cannot specify both --id and --name simultaneously') validate_int(workflow_id, 'workflow id', 1) workflow_id = int(workflow_id) elif workflow_name is not None: validate_chars(workflow_name, 'workflow name', r'\w\s-') elif self.get_opt('list'): pass else: self.usage( 'must specify one of --name / --id / --all or use --list to find workflow names/IDs to specify' ) if max_age is not None: validate_float(max_age, 'max age', 1) max_age = float(max_age) if max_runtime is not None: validate_float(max_runtime, 'max runtime', 1) max_runtime = float(max_runtime) self.url_base = '{protocol}://{host}:{port}/bedrock-app/services/rest'.format( host=host, port=port, protocol=self.protocol) # auth first, get JSESSIONID cookie # cookie jar doesn't work in Python or curl, must extract JSESSIONID to header manually #self.jar = cookielib.CookieJar() log.info('authenticating to Zaloni Bedrock') (_, self.auth_time) = self.req( url='{url_base}/admin/getUserRole'.format(url_base=self.url_base), # using json instead of constructing string manually, # this correctly escapes backslashes in password body=json.dumps({ "username": user, "password": password })) # alternative method #session = requests.Session() #req = self.req(session, # url='http://%(host)s:%(port)s/bedrock-app/services/rest/%(user)s/getUserRole' % locals(), # method='POST') if self.get_opt('list'): self.list_workflows() if self._all: workflows = self.get_workflows() if not workflows or len(workflows) == 0: qquit('UNKNOWN', 'no workflows found') results = {} try: for workflow in workflows: result = self.check_workflow(workflow['wfName'], None) if result is None: results['No Runs'] = results.get('None', 0) results['No Runs'] += 1 continue results[result] = results.get(result, 0) results[result] += 1 self.msg = 'Zaloni workflows: ' for result in results: self.msg += "'{0}' = {1}, ".format(result, results[result]) self.msg = self.msg.rstrip(', ') except KeyError as _: qquit( 'UNKNOWN', 'parsing workflows for --all failed: {0}. '.format(_) + support_msg_api()) else: self.check_workflow(workflow_name, workflow_id, max_age, max_runtime)
def check_channel(self): log.info('checking channel is still open') if not self.channel.is_open: raise CriticalError('channel closed')
def disable_timeout(self): log.info('disabling timeout') self.timeout = 0 signal.alarm(0)
def check_connection(self): log.info('checking connection is still open') if not self.conn.is_open: raise CriticalError('connection closed')
def subscribe(self): credentials = pika.credentials.PlainCredentials( self.user, self.password) parameters = pika.ConnectionParameters( host=self.host, port=self.port, virtual_host=self.vhost, credentials=credentials, heartbeat_interval=1, ssl=self.ssl, connection_attempts=self.default_conn_attempts, retry_delay=self.retry_delay, backpressure_detection=True, # socket_timeout – Use for high latency networks ) self.conn = pika.BlockingConnection(parameters=parameters) log.debug('adding blocked connection callback') self.conn.add_on_connection_blocked_callback( self.connection_blocked_callback) log.debug( 'adding connection timeout to one 3rd of total timeout (%.2f out of %.2f secs)', self.timeout / 3, self.timeout) # no args to this callback self.conn.add_timeout(self.timeout / 3, self.connection_timeout_handler) # self.check_connection() log.info('requesting channel') self.channel = self.conn.channel() log.info('got channel number %s', self.channel.channel_number) log.debug('adding channel cancel callback') self.channel.add_on_cancel_callback(self.connection_cancel_callback) # newer versions of RabbitMQ won't use this but will instead use TCP backpressure # not available on BlockingChannel #self.channel.add_on_flow_callback(self.on_flow_callback) log.debug('adding return callback') # not available on BlockingChannel #self.channel.add_on_return_callback(self.connection_return_callback) if self.use_transactions: log.info('setting channel to use AMQP transactions') self.channel.tx_select() else: log.info('setting RabbitMQ specific channel confirmation') # different in BlockingChannel #self.channel.confirm_delivery(callback=self.confirm_delivery_callback, nowait=False) self.channel.confirm_delivery() self.check_channel() log.info('declaring queue \'%s\'', self.queue) if self.queue: result = self.channel.queue_declare(queue=self.queue, durable=self.durable) if self.queue != result.method.queue: raise UnknownError("queue returned in subscribe ('{queue_returned}') "\ .format(queue_returned=result.method.queue) + \ "did not match requested queue name ('{queue}')"\ .format(queue=self.queue)) else: # auto-generate uniq queue, durable flag is ignored for exclusive result = self.channel.queue_declare(exclusive=True) self.queue = result.method.queue self.routing_key = self.queue log.info('was assigned unique exclusive queue: %s', self.queue) if self.exchange: log.info("declaring exchange: '%s', type: '%s'", self.exchange, self.exchange_type) self.channel.exchange_declare(exchange=self.exchange, type=self.exchange_type) # if using nameless exchange this isn't necessary as routing key will send to queue log.info("binding queue '%s' to exchange '%s'", self.queue, self.exchange) self.channel.queue_bind(exchange=self.exchange, queue=self.queue)