def main(self): try: # Python 2.x super(NagiosPlugin, self).main() # Python 3.x # super().__init__() # redirect_stderr_stdout() except CriticalError as _: qquit('CRITICAL', _) except WarningError as _: qquit('WARNING', _) except UnknownError as _: qquit('UNKNOWN', _) except CodingError as _: qquit('UNKNOWN', 'Programming Error: {0}. {1}'.format(_, support_msg())) except Exception as _: # pylint: disable=broad-except exception_type = type(_).__name__ if log.isEnabledFor(logging.DEBUG): log.debug("exception: '%s'", exception_type) log.debug(traceback.format_exc()) msg = 'Nagios Plugin Exception: {exception_type}: {msg}'.format(exception_type=exception_type, msg=self.exception_msg()) #msg = ', '.join([x.strip() for x in msg.split('\n')]) # ', ' doesn't look nice for ':\n ...' => ':, ...' (snakebite OutOfNNException) #msg = '\t'.join([x.strip() for x in msg.split('\n')]) #if self.options.verbose > 2: # msg = type(_).__name__ + ': ' + msg msg += '. ' + support_msg() qquit('UNKNOWN', msg)
def parse_table(table): """ Take a Beautiful soup table as argument and parse it for compaction information return True if compacting or False otherwise """ log.debug('checking first following table') if log.isEnabledFor(logging.DEBUG): log.debug('table:\n%s\n%s', table.prettify(), '='*80) rows = table.findChildren('tr') if len(rows) < 3: raise UnknownError('parse error - less than the 3 expected rows in table attributes') col_names = rows[0].findChildren('th') if len(col_names) < 3: raise UnknownError('parse error - less than the 3 expected column headings') first_col = col_names[0].get_text().strip() if first_col != 'Attribute Name': raise UnknownError( \ 'parse error - expected first column header to be \'{0}\' but got \'\' instead. '\ .format('Attribute Name') \ + support_msg()) # =========== # fix for older versions of HBase < 1.0 that do not populate the table properly # if table does not exist found_compaction = False for row in rows[1:]: cols = row.findChildren('td') if cols[0].get_text().strip() == 'Compaction': found_compaction = True if not found_compaction: raise CriticalError('Compaction table attribute not found, perhaps table does not exist?') # =========== for row in rows[1:]: cols = row.findChildren('td') if len(cols) < 3: raise UnknownError('parse error - less than the 3 expected columns in table attributes: ' + \ '{0}. {1}'.format(cols, support_msg())) if cols[0].get_text().strip() == 'Compaction': compaction_state = cols[1].get_text().strip() # NONE when enabled, Unknown when disabled log.info('compaction state = %s', compaction_state) for _ in ('NONE', 'Unknown'): if _ in compaction_state: return False # MAJOR_AND_MINOR shows during major compaction if compaction_state == 'MINOR': return False if len(compaction_state.split('\n')) > 1: raise UnknownError('parsing error - table data next to Compaction > 1 line' + \ ', old version of HBase < 0.96? Otherwise HBase UI may have changed' + \ '. {0}'.format(support_msg())) return True
def parse_output(self, content): soup = BeautifulSoup(content, 'html.parser') if log.isEnabledFor(logging.DEBUG): log.debug("BeautifulSoup prettified:\n{0}\n{1}".format(soup.prettify(), '='*80)) # shorter to just catch NoneType attribute error when tag not found and returns None try: basestats = soup.find('div', {'id': 'tab_baseStats'}) table = basestats.find('table') #for table in basestats: rows = table.findAll('tr') headers = rows[0].findAll('th') header_server = headers[0].get_text() header_regions = headers[3].get_text() wider_table = len(headers) > 4 # HBase 1.1 in HDP 2.3: ServerName | Start time | Requests Per Second | Num. Regions # HBase 1.2 (Apache): ServerName | Start time | Version | Requests per Second | Num. Regions if wider_table: header_regions = headers[4].get_text() if header_server != 'ServerName': qquit('UNKNOWN', "Table headers in Master UI have changed" + " (got {0}, expected 'ServerName'). ".format(header_server) + support_msg()) if header_regions != 'Num. Regions': qquit('UNKNOWN', "Table headers in Master UI have changed" + " (got {0}, expected 'Num. Regions'). ".format(header_regions) + support_msg()) log.debug('%-50s\tnum_regions', 'server') for row in rows[1:]: # this can be something like: # 21689588ba40,16201,1473775984259 # so don't apply isHost() validation because it'll fail FQDN / IP address checks cols = row.findAll('td') server = cols[0].get_text() if self.total_regex.match(server): continue num_regions = cols[3].get_text() if wider_table: num_regions = cols[4].get_text() if not isInt(num_regions): qquit('UNKNOWN', "parsing error - got '{0}' for num regions".format(num_regions) + " for server '{1}', was expecting integer.".format(server) + " UI format must have changed" + support_msg()) num_regions = int(num_regions) log.debug('%-50s\t%s', server, num_regions) if self.server_min_regions[1] is None or num_regions < self.server_min_regions[1]: self.server_min_regions = (server, num_regions) if self.server_max_regions[1] is None or num_regions > self.server_max_regions[1]: self.server_max_regions = (server, num_regions) except (AttributeError, TypeError, IndexError): qquit('UNKNOWN', 'failed to find parse output')
def run(self): parquet_file = self.get_opt('parquet') avro_dir = self.get_opt('avro_dir') # let Spark fail if avro/parquet aren't available # can't check paths exist as want to remain generically portable # to HDFS, local filesystm or any other uri scheme Spark supports log.info("Parquet Source: %s" % parquet_file) log.info("Avro Destination: %s" % avro_dir) conf = SparkConf().setAppName('HS PySpark Parquet => Avro') sc = SparkContext(conf=conf) # pylint: disable=invalid-name sqlContext = SQLContext(sc) # pylint: disable=invalid-name spark_version = sc.version log.info('Spark version detected as %s' % spark_version) if not isVersionLax(spark_version): die("Spark version couldn't be determined. " + support_msg('pytools')) # pylint: disable=invalid-name if isMinVersion(spark_version, 1.4): # this doesn't work in Spark <= 1.3 - github docs don't mention the older .method() for writing avro df = sqlContext.read.parquet(parquet_file) df.write.format('com.databricks.spark.avro').save(avro_dir) else: die('Spark <= 1.3 is not supported due to avro dependency, sorry! ' + \ 'I may change this on request but prefer people just upgrade')
def run(self): self.no_args() host = self.get_opt('host') port = self.get_opt('port') validate_host(host) validate_port(port) url = 'http://%(host)s:%(port)s/status' % locals() log.debug('GET %s' % url) try: req = requests.get(url) except requests.exceptions.RequestException as _: qquit('CRITICAL', _) log.debug("response: %s %s" % (req.status_code, req.reason)) log.debug("content:\n{0}\n{1}\n{2}".format('='*80, req.content.strip(), '='*80)) if req.status_code != 200: qquit('CRITICAL', "%s %s" % (req.status_code, req.reason)) soup = BeautifulSoup(req.content, 'html.parser') #if log.isEnabledFor(logging.DEBUG): # log.debug("BeautifulSoup prettified:\n{0}\n{1}".format(soup.prettify(), '='*80)) try: status = soup.find('div', { 'class': 'alert alert-success'}).get_text().strip() except (AttributeError, TypeError): qquit('UNKNOWN', 'failed to parse Apache Drill status page. %s' % support_msg()) self.msg = "Apache Drill status = '{0}'".format(status) if re.match('Running!', status): self.ok() else: self.critical()
def parse_version(self, soup): version = None try: attributes_table = soup.find('table', {'id':'attributes_table'}) rows = attributes_table.findAll('tr') num_rows = len(rows) self.sanity_check(num_rows > 5, 'too few rows ({0})'.format(num_rows)) headers = rows[0].findAll('th') num_headers = len(headers) self.sanity_check(num_headers > 2, 'too few header columns ({0})'.format(num_headers)) self.sanity_check(headers[0].text.strip() == 'Attribute Name', 'header first column does not match expected \'Attribute Name\'') self.sanity_check(headers[1].text.strip() == 'Value', 'header second column does not match expected \'Value\'') for row in rows: cols = row.findAll('td') num_cols = len(cols) if num_cols == 0: continue self.sanity_check(num_cols > 2, 'too few columns ({0})'.format(num_cols)) if cols[0].text.strip() == 'HBase Version': version = cols[1].text.split(',')[0] break except (AttributeError, TypeError): qquit('UNKNOWN', 'failed to find parse HBase output. {0}\n{1}'\ .format(support_msg(), traceback.format_exc())) # strip things like -hadoop2 at end version = version.split('-')[0] return version
def parse(self, content): # could also collect lines after 'Regions-in-transition' if parsing /dump # sample: # hbase:meta,,1.1588230740 state=PENDING_OPEN, \ # ts=Tue Nov 24 08:26:45 UTC 2015 (1098s ago), server=amb2.service.consul,16020,1448353564099 soup = BeautifulSoup(content, 'html.parser') #if log.isEnabledFor(logging.DEBUG): # log.debug("BeautifulSoup prettified:\n%s\n%s", soup.prettify(), '='*80) # looks like HMaster UI doesn't print this section if there are no regions in transition, must assume zero regions_stuck_in_transition = 0 try: headings = soup.findAll('h2') for heading in headings: log.debug("checking heading '%s'", heading) if heading.get_text() == "Regions in Transition": log.debug('found Regions in Transition section header') table = heading.find_next('table') log.debug('checking first following table') regions_stuck_in_transition = self.parse_table(table) if not isInt(regions_stuck_in_transition): qquit('UNKNOWN', 'parse error - ' + 'got non-integer \'{0}\' for regions stuck in transition when parsing HMaster UI'\ .format(regions_stuck_in_transition)) return regions_stuck_in_transition #qquit('UNKNOWN', 'parse error - failed to find table data for regions stuck in transition') except (AttributeError, TypeError): qquit('UNKNOWN', 'failed to parse HBase Master UI status page. ' + support_msg())
def run(self): self.no_args() json_file = self.options.json avro_dir = self.options.avro_dir # let Spark fail if json/avro dir aren't available # can't check paths exist as want to remain generically portable # to HDFS, local filesystm or any other uri scheme Spark supports log.info("Json Source: %s" % json_file) log.info("Avro Destination: %s" % avro_dir) conf = SparkConf().setAppName('HS PySpark Json => Avro') sc = SparkContext(conf=conf) # pylint: disable=invalid-name sqlContext = SQLContext(sc) # pylint: disable=invalid-name spark_version = sc.version log.info('Spark version detected as %s' % spark_version) if not isVersionLax(spark_version): die("Spark version couldn't be determined. " + support_msg('pytools')) # pylint: disable=invalid-name df = None if isMinVersion(spark_version, 1.4): df = sqlContext.read.json(json_file) else: die('Spark <= 1.3 is not supported due to avro dependency, sorry! ' + \ 'I may change this on request but prefer people just upgrade') # log.warn('running legacy code for Spark <= 1.3') #json = sqlContext.jsonFile(json_file) # this doesn't work in Spark <= 1.3 and the github docs don't mention the older methods for writing avro using # the databricks avro driver df.write.format('com.databricks.spark.avro').save(avro_dir)
def run(self): expected = self.get_opt('expected') if expected is not None: validate_regex(expected) log.info('expected version regex: %s', expected) cmd = 'nodetool version' log.debug('cmd: ' + cmd) proc = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT) (stdout, _) = proc.communicate() log.debug('stdout: ' + str(stdout)) returncode = proc.wait() log.debug('returncode: ' + str(returncode)) if returncode != 0 or (stdout is not None and 'Error' in stdout): raise CriticalError('nodetool returncode: {0}, output: {1}'.format(returncode, stdout)) version = None for line in str(stdout).split('\n'): match = self.version_regex.match(line) if match: version = match.group(1) if not version: raise UnknownError('Cassandra version not found in output. Nodetool output may have changed. {0}'. format(support_msg())) if not isVersion(version): raise UnknownError('Cassandra version unrecognized \'{0}\'. {1}'.format(version, support_msg())) self.ok() self.msg = 'Cassandra version = {0}'.format(version) if expected is not None and not re.search(expected, version): self.msg += " (expected '{0}')".format(expected) self.critical()
def parse(self, content): # could also collect lines after 'Regions-in-transition' if parsing /dump # sample: # hbase:meta,,1.1588230740 state=PENDING_OPEN, \ # ts=Tue Nov 24 08:26:45 UTC 2015 (1098s ago), server=amb2.service.consul,16020,1448353564099 soup = BeautifulSoup(content, 'html.parser') #if log.isEnabledFor(logging.DEBUG): # log.debug("BeautifulSoup prettified:\n%s\n%s", soup.prettify(), '='*80) # looks like HMaster UI doesn't print this section if there are no regions in transition, must assume zero longest_rit_time = None try: headings = soup.findAll('h2') for heading in headings: log.debug("checking heading '%s'", heading) if heading.get_text() == "Regions in Transition": log.debug('found Regions in Transition section header') table = heading.find_next('table') log.debug('checking first following table') rows = table.findChildren('tr') header_cols = rows[0].findChildren('th') self.assert_headers(header_cols) longest_rit_time = self.process_rows(rows) return longest_rit_time except (AttributeError, TypeError): qquit('UNKNOWN', 'failed to parse HBase Master UI status page. %s' % support_msg())
def process_rows(rows): longest_rit_time = None # will skip header anyway when it doesn't find td (will contain th instead) # this will avoid accidentally skipping a row later if the input changes to rows[1:] instead of rows #for row in rows[1:]: for row in rows: print(row) cols = row.findChildren('td') # Regions in Transition rows only have 2 cols # <hex> region rows have Region, State, RIT time (ms) num_cols = len(cols) if num_cols == 0: # header row continue elif num_cols != 3: qquit('UNKNOWN', 'unexpected number of columns ({0}) '.format(num_cols) + 'for regions in transition table. ' + support_msg()) if 'Regions in Transition' in cols[0].get_text(): continue rit_time = cols[2].get_text().strip() if not isInt(rit_time): qquit('UNKNOWN', 'parsing failed, got region in transition time of ' + "'{0}', expected integer".format(rit_time)) rit_time = int(rit_time) if rit_time > longest_rit_time: longest_rit_time = rit_time return longest_rit_time
def get_rack_info(self): rack_regex = re.compile(r'^Rack:\s+(.+?)\s*$') node_regex = re.compile(r'^\s+({ip})(?::\d+)?\s+\(({host})\)\s*$'.format(ip=ip_regex, host=host_regex)) #node_regex = re.compile(r'^\s+(.*?).*\s+\((.*?)\)\s*'.format(ip=ip_regex)) start = time.time() cmd = 'hdfs dfsadmin -printTopology' log.debug('cmd: ' + cmd) proc = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT) (stdout, _) = proc.communicate() self.query_time = time.time() - start log.debug('stdout: ' + str(stdout)) returncode = proc.wait() log.debug('returncode: ' + str(returncode)) if returncode != 0 or (stdout is not None and 'Error' in stdout): raise CriticalError('hdfs command returncode: {0}, output: {1}'.format(returncode, stdout)) lines = str(stdout).split('\n') racks = {} rack = None for line in lines: match = rack_regex.match(line) if match: rack = match.group(1) log.info('found rack: %s', rack) continue # ignore early warning lines sometimes output by JVM # only continue from point where we find at least first Rack definition if not rack: continue match = node_regex.match(line) if match: #ip = match.group(1) host = match.group(2) log.info('found host: %s', host) if not rack: raise UnknownError('node regex matched before rack was detected!! {}'.format(support_msg())) if rack not in racks: racks[rack] = [] racks[rack].append(host) elif not line: continue else: raise UnknownError('parsing error. {}'.format(support_msg())) if not rack: raise UnknownError('no rack information found - parse error. {}'.format(support_msg())) return racks
def parse(self, req): soup = BeautifulSoup(req.content, 'html.parser') dead_workers = 0 try: log.info('parsing %s page for number of dead workers', self.path) dead_workers = len([_ for _ in soup.find(id='data2').find('tbody').find_all('tr') if _]) except (AttributeError, TypeError): raise UnknownError('failed to parse {0} Master info for dead workers. UI may have changed. {1}'. format(self.software, support_msg())) try: dead_workers = int(dead_workers) except (ValueError, TypeError): raise UnknownError('{0} Master dead workers parsing returned non-integer: {1}. UI may have changed. {2}'. format(self.software, dead_workers, support_msg())) self.msg = '{0} dead workers = {1}'.format(self.software, dead_workers) self.check_thresholds(dead_workers) self.msg += ' | ' self.msg += 'dead_workers={0}{1}'.format(dead_workers, self.get_perf_thresholds())
def parse_table(table): """ Take a Beautiful soup table as argument and parse it for compaction information return True if compacting or False otherwise """ log.debug('checking first following table') if log.isEnabledFor(logging.DEBUG): log.debug('table:\n%s\n%s', table.prettify(), '='*80) rows = table.findChildren('tr') if len(rows) < 3: qquit('UNKNOWN', 'parse error - less than the 3 expected rows in table attributes') col_names = rows[0].findChildren('th') if len(col_names) < 3: qquit('UNKNOWN', 'parse error - less than the 3 expected column headings') first_col = col_names[0].get_text().strip() if first_col != 'Attribute Name': qquit('UNKNOWN', 'parse error - expected first column header to be \'{0}\' but got \'\' instead. '\ .format('Attribute Name') + support_msg()) # =========== # fix for older versions of HBase < 1.0 that do not populate the table properly # if table does not exist found_compaction = False for row in rows[1:]: cols = row.findChildren('td') if cols[0].get_text().strip() == 'Compaction': found_compaction = True if not found_compaction: qquit('CRITICAL', 'Compaction table attribute not found, perhaps table does not exist?') # =========== for row in rows[1:]: cols = row.findChildren('td') if len(cols) < 3: qquit('UNKNOWN', 'parse error - less than the 3 expected columns in table attributes: ' + \ '{0}. {1}'.format(cols, support_msg())) if cols[0].get_text().strip() == 'Compaction': compaction_state = cols[1].get_text().strip() # NONE when enabled, Unknown when disabled for _ in ('NONE', 'Unknown'): if _ in compaction_state: return False return True
def parse_is_table_compacting(content): soup = BeautifulSoup(content, 'html.parser') if log.isEnabledFor(logging.DEBUG): log.debug("BeautifulSoup prettified:\n{0}\n{1}".format(soup.prettify(), '='*80)) try: headings = soup.findAll('h2') for heading in headings: log.debug("checking heading '%s'", heading) if heading.get_text() == 'Table Attributes': log.debug('found Table Attributes section header') table = heading.find_next('table') log.debug('checking first following table') if log.isEnabledFor(logging.DEBUG): log.debug('table:\n%s\n%s', table.prettify(), '='*80) rows = table.findChildren('tr') if len(rows) < 3: qquit('UNKNOWN', 'parse error - less than the 3 expected rows in table attributes') col_names = rows[0].findChildren('th') if len(col_names) < 3: qquit('UNKNOWN', 'parse error - less than the 3 expected column headings') first_col = col_names[0].get_text().strip() if first_col != 'Attribute Name': qquit('UNKNOWN', 'parse error - expected first column header to be \'{0}\' but got \'\' instead. '\ .format('Attribute Name') + support_msg()) for row in rows[1:]: cols = row.findChildren('td') if len(cols) < 3: qquit('UNKNOWN', 'parse error - less than the 3 expected columns in table attributes. ' + support_msg()) if cols[0].get_text().strip() == 'Compaction': compaction_state = cols[1].get_text().strip() # NONE when enabled, Unknown when disabled if compaction_state in ('NONE', 'Unknown'): return False else: return True qquit('UNKNOWN', 'parse error - failed to find Table Attributes section in JSP. ' + support_msg()) except (AttributeError, TypeError): qquit('UNKNOWN', 'failed to parse output. ' + support_msg())
def parse(self, req): soup = BeautifulSoup(req.content, 'html.parser') #if log.isEnabledFor(logging.DEBUG): # log.debug("BeautifulSoup prettified:\n%s\n%s", soup.prettify(), '='*80) # this masks underlying exception #try: tab = soup.find('div', {'id':'tab_baseStats'}) table = tab.find_next('table') rows = table.findChildren('tr') if len(rows) < 2: raise UnknownError('no regionserver rows found in base stats table! {}'.format(support_msg())) # HBase 1.1 in HDP 2.3: ServerName | Start time | Requests Per Second | Num. Regions # HBase 1.2 (Apache): ServerName | Start time | Version | Requests per Second | Num. Regions # HBase 1.4 (Apache): ServerName | Start time | Last Contact | Version | Requests Per Second | Num. Regions th_list = rows[0].findChildren('th') if len(th_list) < 4: raise UnknownError('no table header for base stats table!') expected_header = 'Requests Per Second' col_index = len(th_list) - 2 found_header = th_list[col_index].text if found_header != expected_header: raise UnknownError("wrong table header found for column 4! Expected '{}' but got '{}'. {}"\ .format(expected_header, found_header, support_msg())) stats = {} for row in rows[1:]: cols = row.findChildren('td') if len(cols) < 4: raise UnknownError('4th column in table not found! {}'.format(support_msg())) regionserver = cols[0].text.strip().split(',')[0] if 'Total:' in regionserver: break reqs_per_sec = cols[col_index].text.strip() if not isInt(reqs_per_sec): raise UnknownError("non-integer found in Requests Per Second column for regionserver '{}'. {}"\ .format(regionserver, support_msg())) # fix for this is to cast string '1.0' to float and then cast to int # ValueError: invalid literal for int() with base 10: '1.0' stats[regionserver] = int(float(reqs_per_sec)) self.process_stats(stats)
def parse(self, req): soup = BeautifulSoup(req.content, 'html.parser') # if log.isEnabledFor(logging.DEBUG): # log.debug("BeautifulSoup prettified:\n%s\n%s", soup.prettify(), '='*80) status = None try: status = soup.find('div', {'class': 'alert alert-success'}).get_text().strip() except (AttributeError, TypeError): qquit('UNKNOWN', 'failed to parse Apache Drill status page. %s' % support_msg()) if re.match('Running!?$', status): self.ok() else: self.critical() return status
def check_id(self, docker_image_line): #_id = output[1][name_len + 10:name_len + 10 + 20].strip() _id = docker_image_line.split()[2] log.debug('id: %s', _id) self.msg += ", id = '{id}'".format(id=_id) if self.expected_id: log.debug('checking expected --id') if not re.match(r'(sha\d+:)?\w+', _id): raise UnknownError("{msg} not in sha format as expected! {support}"\ .format(msg=self.msg, support=support_msg())) if _id != self.expected_id: self.critical() self.msg += " (expected id = '{0}')".format(self.expected_id) return _id
def parse(self, req): soup = BeautifulSoup(req.content, 'html.parser') if log.isEnabledFor(logging.DEBUG): log.debug("BeautifulSoup prettified:\n{0}\n{1}".format(soup.prettify(), '='*80)) version = None try: _ = soup.find('th', {'scope': 'row'}) if _.text.strip() == 'Version': version = _.find_next_sibling('td').text except (AttributeError, TypeError): raise UnknownError('failed to parse output. {}'.format(support_msg())) if not version: raise UnknownError('failed to retrieve version') return version
def parse_is_table_compacting(self, content): soup = BeautifulSoup(content, 'html.parser') if log.isEnabledFor(logging.DEBUG): log.debug("BeautifulSoup prettified:\n{0}\n{1}".format(soup.prettify(), '='*80)) try: headings = soup.findAll('h2') for heading in headings: log.debug("checking heading '%s'", heading) if heading.get_text() == 'Table Attributes': log.debug('found Table Attributes section header') table = heading.find_next('table') return self.parse_table(table) qquit('UNKNOWN', 'parse error - failed to find Table Attributes section in JSP. ' + support_msg()) except (AttributeError, TypeError): qquit('UNKNOWN', 'failed to parse output. ' + support_msg())
def check_size(self, docker_image_line): match = re.search(r'(\d+(?:\.\d+)?)\s*([KMG]B)\s*$', docker_image_line) if match: size = match.group(1) units = match.group(2).strip() log.debug("size: %s", size) log.debug("units: %s", units) size_in_bytes = expand_units(size, units) log.debug("size in bytes: %s", size_in_bytes) else: raise UnknownError('failed to parse size. {0}'.format(support_msg())) self.msg += ", size = {size} {units}".format(size=size, units=units) log.debug('checking size %s against thresholds', size_in_bytes) self.check_thresholds(size_in_bytes) return size_in_bytes
def parse(self, req): soup = BeautifulSoup(req.content, 'html.parser') # if log.isEnabledFor(logging.DEBUG): # log.debug("BeautifulSoup prettified:\n%s\n%s", soup.prettify(), '='*80) status = None try: status = soup.find('div', {'class': 'alert alert-success'}).get_text().strip() except (AttributeError, TypeError): qquit('UNKNOWN', 'failed to parse Apache Drill status page. %s' % support_msg()) # Found a STARTUP status in cluster nodes state but looking at the code for /status is looks like Running is all there is, or results for this endpoint are not properly undocumented - see https://issues.apache.org/jira/browse/DRILL-6407 #if status in ("Startup", "Initializing"): # self.warning() if re.match('^Running!?$', status): self.ok() else: self.critical() return status
def main(self): try: # Python 2.x super(NagiosPlugin, self).main() # Python 3.x # super().__init__() # redirect_stderr_stdout() except CriticalError as _: qquit('CRITICAL', _) except WarningError as _: qquit('WARNING', _) except UnknownError as _: qquit('UNKNOWN', _) except CodingError as _: qquit('UNKNOWN', 'Programming Error: {0}. {1}'.format(_, support_msg())) except Exception as _: # pylint: disable=broad-except qquit('UNKNOWN', _)
def parse(self, req): soup = BeautifulSoup(req.content, 'html.parser') last_heartbeat = None try: self.list_workers(soup) heartbeat_col_header = soup.find('th', text='Node Name').find_next_sibling().get_text() # make sure ordering of columns is as we expect so we're parsing the correct number for heartbeat lag assert heartbeat_col_header == 'Last Heartbeat' last_heartbeat = soup.find('th', text=self.node).find_next_sibling().get_text() if last_heartbeat is None: raise AttributeError except (AttributeError, TypeError): raise CriticalError("{0} worker '{1}' not found among list of live workers!"\ .format(self.software, self.node)) if not isInt(last_heartbeat): raise UnknownError("last heartbeat '{0}' for node '{1}' is not an integer, possible parsing error! {2}"\ .format(last_heartbeat, self.node, support_msg())) self.msg = "{0} worker '{1}' last heartbeat = {2} secs ago".format(self.software, self.node, last_heartbeat) self.check_thresholds(last_heartbeat) self.msg += ' | last_heartbeat={0}s{1}'.format(last_heartbeat, self.get_perf_thresholds())
def run(self): json_file = self.get_opt('json') parquet_dir = self.get_opt('parquet_dir') # let Spark fail if csv/parquet aren't available # can't check paths exist as want to remain generically portable # to HDFS, local filesystm or any other uri scheme Spark supports log.info("Json Source: %s" % json_file) log.info("Parquet Destination: %s" % parquet_dir) conf = SparkConf().setAppName('HS PySpark JSON => Parquet') sc = SparkContext(conf=conf) # pylint: disable=invalid-name sqlContext = SQLContext(sc) # pylint: disable=invalid-name spark_version = sc.version log.info('Spark version detected as %s' % spark_version) if not isVersionLax(spark_version): die("Spark version couldn't be determined. " + support_msg('pytools')) if isMinVersion(spark_version, 1.4): df = sqlContext.read.json(json_file) # pylint: disable=invalid-name df.write.parquet(parquet_dir) else: log.warn('running legacy code for Spark <= 1.3') df = sqlContext.jsonFile(json_file) # pylint: disable=invalid-name df.saveAsParquetFile(parquet_dir)
def sanity_check(condition, msg): if not condition: qquit('UNKNOWN', 'HBase attribute table header ' + msg + ', failed sanity check! ' + support_msg())
def run(self): csv_file = self.get_opt('csv') avro_dir = self.get_opt('avro_dir') has_header = self.get_opt('has_header') # I don't know why the Spark guys made this a string instead of a bool header_str = 'false' if has_header: header_str = 'true' schema = self.get_opt('schema') # let Spark fail if csv/avro dir aren't available # can't check paths exist as want to remain generically portable # to HDFS, local filesystm or any other uri scheme Spark supports log.info("CSV Source: %s" % csv_file) log.info("Avro Destination: %s" % avro_dir) if schema: def get_type(arg): arg = str(arg).lower() if arg not in self.types_mapping: self.usage("invalid type '%s' defined in --schema, must be one of: %s" % (arg, ', '.join(sorted(self.types_mapping.keys())))) # return self.types_mapping[arg] module = __import__('pyspark.sql.types', globals(), locals(), ['types'], -1) class_ = getattr(module, self.types_mapping[arg]) _ = class_() return _ def create_struct(arg): name = str(arg).strip() data_type = 'string' if ':' in arg: (name, data_type) = arg.split(':', 1) data_class = get_type(data_type) return StructField(name, data_class, True) # see https://github.com/databricks/spark-csv#python-api self.schema = StructType([create_struct(_) for _ in schema.split(',')]) log.info('generated CSV => Spark schema') conf = SparkConf().setAppName('HS PySpark CSV => Avro') sc = SparkContext(conf=conf) # pylint: disable=invalid-name sqlContext = SQLContext(sc) # pylint: disable=invalid-name spark_version = sc.version log.info('Spark version detected as %s' % spark_version) if not isVersionLax(spark_version): die("Spark version couldn't be determined. " + support_msg('pytools')) # pylint: disable=invalid-name df = None if isMinVersion(spark_version, 1.4): if has_header and not schema: log.info('inferring schema from CSV headers') df = sqlContext.read.format('com.databricks.spark.csv')\ .options(header=header_str, inferschema='true')\ .load(csv_file) else: log.info('using explicitly defined schema') schema = self.schema df = sqlContext.read\ .format('com.databricks.spark.csv')\ .options(header=header_str)\ .load(csv_file, schema=schema) else: die('Spark <= 1.3 is not supported due to avro dependency, sorry! ' + \ 'I may change this on request but prefer people just upgrade') # log.warn('running legacy code for Spark <= 1.3') # if has_header and not schema: # log.info('inferring schema from CSV headers') # df = sqlContext.load(source="com.databricks.spark.csv", path=csv_file, # header=header_str, inferSchema='true') # elif self.schema: # log.info('using explicitly defined schema') # df = sqlContext.load(source="com.databricks.spark.csv", path=csv_file, # header=header_str, schema=self.schema) # else: # die('no header and no schema, caught late') # this doesn't work in Spark <= 1.3 and the github docs don't mention the older methods for writing avro using # the databricks avro driver df.write.format('com.databricks.spark.avro').save(avro_dir)
def parse_error(msg): qquit('UNKNOWN', 'parse error - ' + msg + '. ' + support_msg())
def sanity_check(condition, msg): if not condition: qquit( 'UNKNOWN', 'HBase attribute table header ' + msg + ', failed sanity check! ' + support_msg())
def parse(self, req): soup = BeautifulSoup(req.content, 'html.parser') if log.isEnabledFor(logging.DEBUG): log.debug("BeautifulSoup prettified:\n{0}\n{1}".format(soup.prettify(), '='*80)) version = None try: _ = soup.find('span', {'class': 'jenkins_ver'}) log.debug('found span containing jenkins_ver') if _: version = _.text.strip() except (AttributeError, TypeError): raise UnknownError('failed to parse output') if not version: raise UnknownError('failed to retrieve version') log.debug('extracting version for Jenkins version string: %s', version) _ = re.match(r'Jenkins ver\. ({0})'.format(version_regex), version) if not _: raise UnknownError('failed to parse version string, format may have changed. {0}'.format(support_msg())) version = _.group(1) return version
def run(self): csv_file = self.options.csv avro_dir = self.options.avro_dir has_header = self.options.has_header # I don't know why the Spark guys made this a string instead of a bool header_str = 'false' if has_header: header_str = 'true' schema = self.options.schema # let Spark fail if csv/avro dir aren't available # can't check paths exist as want to remain generically portable # to HDFS, local filesystm or any other uri scheme Spark supports log.info("CSV Source: %s" % csv_file) log.info("Avro Destination: %s" % avro_dir) if schema: def get_type(arg): arg = str(arg).lower() if arg not in self.types_mapping: self.usage("invalid type '%s' defined in --schema, must be one of: %s" % (arg, ', '.join(sorted(self.types_mapping.keys())))) # return self.types_mapping[arg] module = __import__('pyspark.sql.types', globals(), locals(), ['types'], -1) class_ = getattr(module, self.types_mapping[arg]) _ = class_() return _ def create_struct(arg): name = arg data_type = 'string' if ':' in arg: (name, data_type) = arg.split(':', 1) data_class = get_type(data_type) return StructField(name, data_class, True) # see https://github.com/databricks/spark-csv#python-api self.schema = StructType([create_struct(_) for _ in schema.split(',')]) log.info('generated CSV => Spark schema') conf = SparkConf().setAppName('HS PySpark CSV => Avro') sc = SparkContext(conf=conf) # pylint: disable=invalid-name sqlContext = SQLContext(sc) # pylint: disable=invalid-name spark_version = sc.version log.info('Spark version detected as %s' % spark_version) if not isVersionLax(spark_version): die("Spark version couldn't be determined. " + support_msg('pytools')) # pylint: disable=invalid-name df = None if isMinVersion(spark_version, 1.4): if has_header and not schema: log.info('inferring schema from CSV headers') df = sqlContext.read.format('com.databricks.spark.csv')\ .options(header=header_str, inferschema='true')\ .load(csv_file) else: log.info('using explicitly defined schema') schema = self.schema df = sqlContext.read\ .format('com.databricks.spark.csv')\ .options(header=header_str)\ .load(csv_file, schema=schema) else: die('Spark <= 1.3 is not supported due to avro dependency, sorry! ' + \ 'I may change this on request but prefer people just upgrade') # log.warn('running legacy code for Spark <= 1.3') # if has_header and not schema: # log.info('inferring schema from CSV headers') # df = sqlContext.load(source="com.databricks.spark.csv", path=csv_file, # header=header_str, inferSchema='true') # elif self.schema: # log.info('using explicitly defined schema') # df = sqlContext.load(source="com.databricks.spark.csv", path=csv_file, # header=header_str, schema=self.schema) # else: # die('no header and no schema, caught late') # this doesn't work in Spark <= 1.3 and the github docs don't mention the older methods for writing avro using # the databricks avro driver df.write.format('com.databricks.spark.avro').save(avro_dir)
def parse(self, req): soup = BeautifulSoup(req.content, 'html.parser') if log.isEnabledFor(logging.DEBUG): log.debug("BeautifulSoup prettified:\n{0}\n{1}".format(soup.prettify(), '='*80)) version = None try: _ = soup.find('span', {'class': 'jenkins_ver'}) log.debug('found span containing jenkins_ver') if _: version = _.text.strip() except (AttributeError, TypeError): raise UnknownError('failed to parse output') if not version: raise UnknownError('failed to retrieve version') log.debug('extracting version for Jenkins version string: %s', version) _ = re.match(r'Jenkins ver\. ({0})'.format(version_regex), str(version)) if not _: raise UnknownError('failed to parse version string, format may have changed. {0}'.format(support_msg())) version = _.group(1) return version
def run(self): expected = self.get_opt("expected") if expected is not None: validate_regex(expected) log.info("expected version regex: %s", expected) cmd = "consul version" log.debug("cmd: " + cmd) proc = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT) (stdout, _) = proc.communicate() log.debug("stdout: " + str(stdout)) returncode = proc.wait() log.debug("returncode: " + str(returncode)) if returncode != 0 or (stdout is not None and "Error" in stdout): raise CriticalError("consul returncode: {0}, output: {1}".format(returncode, stdout)) version = None for line in str(stdout).split("\n"): match = self.version_regex.match(line) if match: version = match.group(1) if not version: raise UnknownError( "Consul version not found in output. Consul output may have changed. {0}".format(support_msg()) ) if not isVersion(version): raise UnknownError("Consul version unrecognized '{0}'. {1}".format(version, support_msg())) self.ok() self.msg = "Consul version = {0}".format(version) if expected is not None and not re.search(expected, version): self.msg += " (expected '{0}')".format(expected) self.critical()