コード例 #1
0
 def output(self, connect_time, total_time):
     self.msg = "HBase write spray to {0} column {1} x {2} region{3}".format(self.num_column_families,
                                                                             'families' if \
                                                                             plural(self.num_column_families) \
                                                                             else 'family',
                                                                             self.num_regions,
                                                                             plural(self.num_regions))
     precision = self.precision
     self.msg += " total_time={0:0.{precision}f}ms".format(total_time, precision=precision)
     self.msg += " connect_time={connect_time:0.{precision}f}ms".format(connect_time=connect_time,
                                                                        precision=precision)
     perfdata = " | total_time={total_time:0.{precision}f}ms connect_time={connect_time:0.{precision}f}ms"\
                .format(total_time=total_time, connect_time=connect_time, precision=precision)
     self.msg += ", max timings: column family "
     for cf_qf in self.timings:
         column = cf_qf.split(':', 2)[0]
         self.msg += "'{0}'".format(column)
         for action in ['write', 'read', 'delete']:
             query_time = self.timings[cf_qf][action]
             self.msg += " {0}_time={1:0.{precision}f}ms".format(action,
                                                                 query_time,
                                                                 precision=precision)
             self.check_thresholds(self.timings[cf_qf][action])
             perfdata += " '{0}_max_{1}_time'={2:0.{precision}f}ms".format(column,
                                                                           action,
                                                                           query_time,
                                                                           precision=precision)
             perfdata += self.get_perf_thresholds()
         self.msg += ', '
     self.msg = self.msg.rstrip(', ')
     self.msg += perfdata
コード例 #2
0
 def run(self):
     try:
         linux_only(' as it reads /proc/mounts for more reliable information than the mount command provides' + \
                    ', see --help description for more details')
     except LinuxOnlyException as _:
         raise UnknownError('LinuxOnlyException: {}'.format(_))
     mount_lines = self.get_mounts()
     (num_read_only, num_checked, read_only) = self.parse_mounts(mount_lines)
     self.msg = '{} read only mount point{} out of {} mount point{} checked'\
                .format(num_read_only, plural(num_read_only), num_checked, plural(num_checked))
     if num_read_only == 0:
         self.ok()
     if num_checked == 0:
         self.warning()
         self.msg += ' (no matching mount points?)'
     if num_read_only > 0:
         self.critical()
         self.msg += '!'
         if self.verbose:
             from pprint import pprint
             pprint(read_only)
             if self.verbose > 1:
                 _ = ['{}({})'.format(mount_point, _type) for mount_point, _type in read_only.iteritems()]
             else:
                 _ = [mount_point for mount_point, _type in read_only.iteritems()]
             self.msg += ' [{}]'.format(', '.join(_))
     self.msg += ' | read_only_mount_points={} mount_points_checked={}'.format(num_read_only, num_checked)
 def parse_json(self, json_data):
     if self.list:
         print('Ambari Clusters:\n')
         for _ in json_data['items']:
             print(_['Clusters']['cluster_name'])
         sys.exit(ERRORS['UNKNOWN'])
     racks = {}
     for host in json_data['items']:
         host_name = host['Hosts']['host_name']
         rack = host['Hosts']['rack_info']
         if rack not in racks:
             racks[rack] = []
         racks[rack].append(host_name)
     num_racks = len(racks)
     self.msg = '{} rack{} configured'.format(num_racks, plural(num_racks))
     if num_racks < 2:
         self.warning()
         self.msg += ' (no rack resilience!)'
     default_rack = '/default-rack'
     num_nodes_left_in_default_rack = 0
     if default_rack in racks:
         self.warning()
         num_nodes_left_in_default_rack = len(racks[default_rack])
         msg = "{num} node{plural} left in '{default_rack}'!"\
               .format(num=num_nodes_left_in_default_rack,
                       plural=plural(num_nodes_left_in_default_rack),
                       default_rack=default_rack)
         if self.verbose:
             msg += ' [{}]'.format(', '.join(racks[default_rack]))
         self.msg = msg + ' - ' + self.msg
     self.msg += ' | hdfs_racks={};2 nodes_in_default_rack={};0'\
                 .format(num_racks, num_nodes_left_in_default_rack)
コード例 #4
0
    def run(self):
        server_url = '{proto}://{host}:{port}'.format(proto=self.protocol, host=self.host, port=self.port)
        try:
            log.debug('setting up Jenkins connection to %s', server_url)
            start_time = time.time()
            server = jenkins.Jenkins(server_url, username=self.user, password=self.password, timeout=self.timeout / 3)
            if log.isEnabledFor(logging.DEBUG):
                log.debug('getting user')
                user = server.get_whoami()
                log.debug('connected as user %s', jsonpp(user))
            log.debug('getting Jenkins nodes')
            nodes = server.get_nodes()
            log.debug('nodes: %s', nodes)
            node_count = len(nodes)
            log.debug('node count: %s', node_count)
            offline_nodes = 0
            for node in nodes:
                if node['offline']:
                    offline_nodes += 1
            self.msg += '{0} offline node{1}'.format(offline_nodes, plural(offline_nodes))
            self.check_thresholds(offline_nodes)
            self.msg += ' out of {0} node{1}'.format(node_count, plural(node_count))
        except jenkins.JenkinsException as _:
            raise CriticalError(_)

        query_time = time.time() - start_time
        self.msg += ' | offline_nodes={0:d}'.format(offline_nodes)
        self.msg += self.get_perf_thresholds()
        self.msg += ' node_count={0:d}'.format(node_count)
        self.msg += ' query_time={0:.4f}s'.format(query_time)
 def parse_json(self, json_data):
     if self.list:
         print('Ambari Clusters:\n')
         for _ in json_data['items']:
             print(_['Clusters']['cluster_name'])
         sys.exit(ERRORS['UNKNOWN'])
     racks = {}
     for host in json_data['items']:
         host_name = host['Hosts']['host_name']
         rack = host['Hosts']['rack_info']
         if rack not in racks:
             racks[rack] = []
         racks[rack].append(host_name)
     num_racks = len(racks)
     self.msg = '{} rack{} configured'.format(num_racks, plural(num_racks))
     if num_racks < 2:
         self.warning()
         self.msg += ' (no rack resilience!)'
     default_rack = '/default-rack'
     num_nodes_left_in_default_rack = 0
     if default_rack in racks:
         self.warning()
         num_nodes_left_in_default_rack = len(racks[default_rack])
         msg = "{num} node{plural} left in '{default_rack}'!"\
               .format(num=num_nodes_left_in_default_rack,
                       plural=plural(num_nodes_left_in_default_rack),
                       default_rack=default_rack)
         if self.verbose:
             msg += ' [{}]'.format(', '.join(racks[default_rack]))
         self.msg = msg + ' - ' + self.msg
     self.msg += ' | hdfs_racks={};2 nodes_in_default_rack={};0'\
                 .format(num_racks, num_nodes_left_in_default_rack)
コード例 #6
0
 def output(self, connect_time, total_time):
     self.msg = "HBase write spray to {0} column {1} x {2} region{3}".format(self.num_column_families,
                                                                             'families' if \
                                                                             plural(self.num_column_families) \
                                                                             else 'family',
                                                                             self.num_regions,
                                                                             plural(self.num_regions))
     precision = self.precision
     self.msg += " total_time={0:0.{precision}f}ms".format(total_time, precision=precision)
     self.msg += " connect_time={connect_time:0.{precision}f}ms".format(connect_time=connect_time,
                                                                        precision=precision)
     perfdata = " | total_time={total_time:0.{precision}f}ms connect_time={connect_time:0.{precision}f}ms"\
                .format(total_time=total_time, connect_time=connect_time, precision=precision)
     self.msg += ", max timings: column family "
     for cf_qf in self.timings:
         column = cf_qf.split(':', 2)[0]
         self.msg += "'{0}'".format(column)
         for action in ['write', 'read', 'delete']:
             query_time = self.timings[cf_qf][action]
             self.msg += " {0}_time={1:0.{precision}f}ms".format(action,
                                                                 query_time,
                                                                 precision=precision)
             self.check_thresholds(self.timings[cf_qf][action])
             perfdata += " '{0}_max_{1}_time'={2:0.{precision}f}ms".format(column,
                                                                           action,
                                                                           query_time,
                                                                           precision=precision)
             perfdata += self.get_perf_thresholds()
         self.msg += ', '
     self.msg = self.msg.rstrip(', ')
     self.msg += perfdata
コード例 #7
0
 def parse_json(self, json_data):
     log.info('parsing response')
     try:
         data = json_data['beans'][0]
         name_dir_statuses = data['NameDirStatuses']
         name_dir_data = json.loads(name_dir_statuses)
         active_dirs = name_dir_data['active']
         failed_dirs = name_dir_data['failed']
         num_active_dirs = len(active_dirs)
         num_failed_dirs = len(failed_dirs)
         self.msg = 'NameNode has {0} failed dir{1}'.format(
             num_failed_dirs, plural(num_failed_dirs))
         if num_failed_dirs > 0:
             self.warning()
             if self.verbose:
                 self.msg += ' ({0})'.format(', '.join(failed_dirs))
         self.msg += ', {0} active dir{1}'.format(num_active_dirs,
                                                  plural(num_active_dirs))
         if num_active_dirs < 1:
             self.critical()
         if self.verbose and num_active_dirs > 0:
             self.msg += ' ({0})'.format(', '.join(active_dirs))
         self.msg += ' | num_failed_dirs={0} num_active_dirs={1}'.format(
             num_failed_dirs, num_active_dirs)
     except KeyError as _:
         raise UnknownError("failed to parse json returned by NameNode at '{0}:{1}': {2}. {3}"\
                            .format(self.host, self.port, _, support_msg_api()))
     except ValueError as _:
         raise UnknownError("invalid json returned for NameDirStatuses by Namenode '{0}:{1}': {2}"\
                            .format(self.host, self.port, _))
コード例 #8
0
 def check_times(self, start_date, end_date, max_age, max_runtime):
     try:
         start_datetime = datetime.strptime(start_date, '%m/%d/%Y %H:%M:%S')
         end_datetime = datetime.strptime(end_date, '%m/%d/%Y %H:%M:%S')
     except ValueError as _:
         qquit('UNKNOWN', 'error parsing date time format: {0}'.format(_))
     runtime_delta = end_datetime - start_datetime
     self.msg += ' in {0}'.format(sec2human(runtime_delta.seconds))
     if max_runtime is not None and max_runtime > (runtime_delta.seconds / 3600.0):
         self.warning()
         self.msg += ' (greater than {0} min{1}!)'.format('{0}'.format(max_runtime).rstrip('.0'),
                                                          plural(max_runtime))
     age_timedelta = datetime.now() - start_datetime
     if self.verbose:
         self.msg += ", start date = '{startdate}', end date = '{enddate}'".\
                     format(startdate=start_date, enddate=end_date)
         self.msg += ', started {0} ago'.format(sec2human(age_timedelta.seconds))
     if max_age is not None and age_timedelta.seconds > (max_age * 60.0):
         self.warning()
         self.msg += ' (last run started more than {0} min{1} ago!)'.format('{0}'.format(max_age).rstrip('.0'),
                                                                            plural(max_age))
     self.msg += ' |'
     self.msg += ' runtime={0}s;{1}'.format(runtime_delta.seconds, max_runtime * 3600 if max_runtime else '')
     self.msg += ' age={0}s;{1}'.format(age_timedelta.seconds, max_age * 3600 if max_age else '')
     self.msg += ' auth_time={auth_time}s query_time={query_time}s'.format(auth_time=self.auth_time,
                                                                           query_time=self.query_time)
 def parse_json(self, json_data):
     log.info('parsing response')
     try:
         data = json_data['beans'][0]
         name_dir_statuses = data['NameDirStatuses']
         name_dir_data = json.loads(name_dir_statuses)
         active_dirs = name_dir_data['active']
         failed_dirs = name_dir_data['failed']
         num_active_dirs = len(active_dirs)
         num_failed_dirs = len(failed_dirs)
         self.msg = 'NameNode has {0} failed dir{1}'.format(num_failed_dirs, plural(num_failed_dirs))
         if num_failed_dirs > 0:
             self.warning()
             if self.verbose:
                 self.msg += ' ({0})'.format(', '.join(failed_dirs))
         self.msg += ', {0} active dir{1}'.format(num_active_dirs, plural(num_active_dirs))
         if num_active_dirs < 1:
             self.critical()
         if self.verbose and num_active_dirs > 0:
             self.msg += ' ({0})'.format(', '.join(active_dirs))
         self.msg += ' | num_failed_dirs={0} num_active_dirs={1}'.format(num_failed_dirs, num_active_dirs)
     except KeyError as _:
         raise UnknownError("failed to parse json returned by NameNode at '{0}:{1}': {2}. {3}"\
                            .format(self.host, self.port, _, support_msg_api()))
     except ValueError as _:
         raise UnknownError("invalid json returned for NameDirStatuses by Namenode '{0}:{1}': {2}"\
                            .format(self.host, self.port, _))
コード例 #10
0
 def run(self):
     try:
         linux_only(' as it reads /proc/mounts for more reliable information than the mount command provides' + \
                    ', see --help description for more details')
     except LinuxOnlyException as _:
         raise UnknownError('LinuxOnlyException: {}'.format(_))
     mount_lines = self.get_mounts()
     (num_read_only, num_checked,
      read_only) = self.parse_mounts(mount_lines)
     self.msg = '{} read only mount point{} out of {} mount point{} checked'\
                .format(num_read_only, plural(num_read_only), num_checked, plural(num_checked))
     if num_read_only == 0:
         self.ok()
     if num_checked == 0:
         self.warning()
         self.msg += ' (no matching mount points?)'
     if num_read_only > 0:
         self.critical()
         self.msg += '!'
         if self.verbose:
             from pprint import pprint
             pprint(read_only)
             if self.verbose > 1:
                 _ = [
                     '{}({})'.format(mount_point, _type)
                     for mount_point, _type in read_only.iteritems()
                 ]
             else:
                 _ = [
                     mount_point
                     for mount_point, _type in read_only.iteritems()
                 ]
             self.msg += ' [{}]'.format(', '.join(_))
     self.msg += ' | read_only_mount_points={} mount_points_checked={}'.format(
         num_read_only, num_checked)
コード例 #11
0
    def run(self):
        server_url = '{proto}://{host}:{port}'.format(proto=self.protocol, host=self.host, port=self.port)
        try:
            log.debug('setting up Jenkins connection to %s', server_url)
            start_time = time.time()
            server = jenkins.Jenkins(server_url, username=self.user, password=self.password, timeout=self.timeout / 3)
            if log.isEnabledFor(logging.DEBUG):
                log.debug('getting user')
                user = server.get_whoami()
                log.debug('connected as user %s', jsonpp(user))
            log.debug('getting Jenkins nodes')
            nodes = server.get_nodes()
            log.debug('nodes: %s', nodes)
            node_count = len(nodes)
            log.debug('node count: %s', node_count)
            offline_nodes = 0
            for node in nodes:
                if node['offline']:
                    offline_nodes += 1
            self.msg += '{0} offline node{1}'.format(offline_nodes, plural(offline_nodes))
            self.check_thresholds(offline_nodes)
            self.msg += ' out of {0} node{1}'.format(node_count, plural(node_count))
        except jenkins.JenkinsException as _:
            raise CriticalError(_)

        query_time = time.time() - start_time
        self.msg += ' | offline_nodes={0:d}'.format(offline_nodes)
        self.msg += self.get_perf_thresholds()
        self.msg += ' node_count={0:d}'.format(node_count)
        self.msg += ' query_time={0:.4f}s'.format(query_time)
コード例 #12
0
 def check_ingestion(self,
                     num,
                     filter_opts=None,
                     max_age=None,
                     max_runtime=None):
     log.info('checking ingestion history')
     json_dict = self.get_ingestions(num, filter_opts)
     info = ''
     if self.verbose:
         for key in sorted(filter_opts):
             info += " {0}='{1}'".format(key, filter_opts[key])
     try:
         result = json_dict['result']
         if not result:
             qquit('CRITICAL', "no results found for ingestion{0}"\
                   .format('{0}. {1}'.format(info, self.extract_response_message(json_dict)) + \
                   'Perhaps you specified incorrect filters? Use --list to see existing ingestions'))
         num_results = len(result)
         log.info('%s ingestion history results returned', num_results)
         self.check_statuses(result)
         if num:
             self.msg += ' out of last {0} ingest{1}'.format(
                 num_results, plural(num_results))
         if self.history_mins:
             self.msg += ' within last {0} min{1}'.format(
                 str(self.history_mins).rstrip('0').rstrip('.'),
                 plural(self.history_mins))
         longest_incomplete_timedelta = self.check_longest_incomplete_ingest(
             result, max_runtime)
         # newest is first
         # effectiveDate is null in testing (docs says it's a placeholder for future use)
         # using ingestionTimeFormatted instead, could also use ingestionTime which is timestamp in millis
         ingestion_date = result[0]['ingestionTimeFormatted']
         age_timedelta = self.check_last_ingest_age(
             ingestion_date=ingestion_date, max_age=max_age)
         params_reference = [('inventoryId', 'id'), ('fileName', 'source'),
                             ('destinationPath', 'dest')]
         if self.verbose and [
                 param
                 for (param, _) in params_reference if param in filter_opts
         ]:
             self.msg += ' for'
             for (param, name) in params_reference:
                 if param in filter_opts:
                     self.msg += " {name}='{value}'".format(
                         name=name, value=filter_opts[param])
         self.msg += ' |'
         self.msg += ' last_ingest_age={0}s;{1}'.format(
             age_timedelta.seconds, max_age * 3600 if max_age else '')
         self.msg += ' longest_incomplete_ingest_age={0}s;{1}'.format(longest_incomplete_timedelta.seconds \
                                                                      if longest_incomplete_timedelta else 0,
                                                                      max_age * 3600 if max_age else '')
         self.msg += ' auth_time={auth_time}s query_time={query_time}s'.format(
             auth_time=self.auth_time, query_time=self.query_time)
     except KeyError as _:
         qquit('UNKNOWN',
               'error parsing workflow execution history: {0}'.format(_))
コード例 #13
0
    def check_table_regions(self):
        log.info('checking regions for table \'%s\'', self.table)
        regions = None
        try:
            table = self.conn.table(self.table)
            regions = table.regions()
        except HBaseIOError as _:
            #if 'org.apache.hadoop.hbase.TableNotFoundException' in _.message:
            if 'TableNotFoundException' in _.message:
                qquit('CRITICAL',
                      'table \'{0}\' does not exist'.format(self.table))
            else:
                qquit('CRITICAL', _)
        except (socket.error, socket.timeout, ThriftException) as _:
            qquit('CRITICAL', _)

        if log.isEnabledFor(logging.DEBUG):
            log.debug('%s', jsonpp(regions))
        if not regions:
            qquit('CRITICAL',
                  'failed to get regions for table \'{0}\''.format(self.table))
        if not isList(regions):
            qquit('UNKNOWN',
                  'region info returned is not a list! ' + support_msg_api())
        num_regions = len(regions)
        log.info('num regions: %s', num_regions)

        self.msg = 'HBase table \'{0}\' has {1} region{2}'.format(
            self.table, num_regions, plural(num_regions))
        self.check_thresholds(num_regions)

        num_unassigned_regions = 0
        for region in regions:
            try:
                if not region['server_name']:
                    #log.debug('region \'%s\' is not assigned to any server', region['name'])
                    num_unassigned_regions += 1
            except KeyError as _:
                qquit(
                    'UNKNOWN', 'failed to find server assigned to region. ' +
                    support_msg_api())
        log.info('num unassigned regions: %s', num_unassigned_regions)
        self.msg += ', {0} unassigned region{1}'.format(
            num_unassigned_regions, plural(num_unassigned_regions))
        if num_unassigned_regions > 0:
            self.warning()
            self.msg += '!'

        self.msg += ' |'
        self.msg += ' num_regions={0}'.format(
            num_regions) + self.get_perf_thresholds(boundary='lower')
        self.msg += ' num_unassigned_regions={0};1;0'.format(
            num_unassigned_regions)
        log.info('finished, closing connection')
        self.conn.close()
コード例 #14
0
 def check_times(self, start_date, end_date):
     start_date = str(start_date).strip()
     end_date = str(end_date).strip()
     invalid_dates = ('', 'null', 'None', None)
     age_timedelta = None
     runtime_delta = None
     if start_date not in invalid_dates and \
        end_date not in invalid_dates:
         try:
             start_datetime = datetime.strptime(start_date,
                                                '%m/%d/%Y %H:%M:%S')
             end_datetime = datetime.strptime(end_date, '%m/%d/%Y %H:%M:%S')
         except ValueError as _:
             qquit('UNKNOWN',
                   'error parsing date time format: {0}'.format(_))
         runtime_delta = end_datetime - start_datetime
         runtime_delta_secs = self.timedelta_seconds(runtime_delta)
         self.msg += ' in {0}'.format(sec2human(runtime_delta_secs))
         if self.max_runtime is not None and (runtime_delta_secs /
                                              60.0) > self.max_runtime:
             self.warning()
             self.msg += ' (greater than {0} min{1}!)'.format(
                 str(self.max_runtime).rstrip('0').rstrip('.'),
                 plural(self.max_runtime))
         if self.min_runtime is not None and (runtime_delta_secs /
                                              60.0) < self.min_runtime:
             self.warning()
             self.msg += ' (less than {0} min{1}!)'.format(
                 str(self.min_runtime).rstrip('0').rstrip('.'),
                 plural(self.min_runtime))
         age_timedelta = datetime.now() - start_datetime
         age_timedelta_secs = self.timedelta_seconds(age_timedelta)
     if self.verbose:
         self.msg += ", start date = '{startdate}', end date = '{enddate}'".\
                     format(startdate=start_date, enddate=end_date)
         if age_timedelta is not None:
             self.msg += ', started {0} ago'.format(
                 sec2human(age_timedelta_secs))
     if self.max_age is not None and age_timedelta is not None \
        and age_timedelta_secs > (self.max_age * 60.0):
         self.warning()
         self.msg += ' (last run started more than {0} min{1} ago!)'.format(
             str(self.max_age).rstrip('0').rstrip('.'),
             plural(self.max_age))
     # Do not output variable number of fields at all if agedelta is not available as that breaks PNP4Nagios graphing
     if age_timedelta is not None and runtime_delta:
         self.msg += ' |'
         self.msg += ' runtime={0}s;{1}'.format(runtime_delta_secs, self.max_runtime * 60 \
                                                                         if self.max_runtime else '')
         self.msg += ' age={0}s;{1}'.format(
             age_timedelta_secs, self.max_age * 60 if self.max_age else '')
         self.msg += ' auth_time={auth_time}s query_time={query_time}s'.format(
             auth_time=self.auth_time, query_time=self.query_time)
コード例 #15
0
 def parse_json(self, json_data):
     log.info('parsing response')
     try:
         live_nodes = json_data['beans'][0]['LiveNodes']
         live_node_data = json.loads(live_nodes)
         num_datanodes = len(live_node_data)
         if num_datanodes < 1:
             raise UnknownError("no live datanodes returned by JMX API from namenode '{0}:{1}'"\
                                .format(self.host, self.port))
         min_space = None
         max_space = 0
         for datanode in live_node_data:
             used_space = live_node_data[datanode]['usedSpace']
             if not isInt(used_space):
                 raise UnknownError(
                     'usedSpace is not an integer! {0}'.format(
                         support_msg_api()))
             used_space = int(used_space)
             log.info("datanode '%s' used space = %s", datanode, used_space)
             if min_space is None or used_space < min_space:
                 min_space = used_space
             if used_space > max_space:
                 max_space = used_space
         divisor = max_space
         if divisor < 1:
             log.info(
                 'min used space < 1, resetting divisor to 1 (% will likely be very high)'
             )
             divisor = 1
         assert max_space >= min_space
         largest_imbalance_pc = float('{0:.2f}'.format(
             ((max_space - min_space) / divisor) * 100))
         assert largest_imbalance_pc >= 0
         self.ok()
         self.msg = '{0}% HDFS imbalance on space used'.format(
             largest_imbalance_pc)
         self.check_thresholds(largest_imbalance_pc)
         self.msg += ' across {0:d} datanode{1}'.format(
             num_datanodes, plural(num_datanodes))
         if self.verbose:
             self.msg += ', min used space = {0}, max used space = {1}'.format(
                 min_space, max_space)
         if self.verbose and (self.is_warning() or self.is_critical()):
             self.msg += ' [imbalanced nodes: '
             for datanode in live_node_data:
                 used_space = live_node_data[datanode]['usedSpace']
                 if (used_space / max_space *
                         100) > self.thresholds['warning']['upper']:
                     self.msg += '{0}({1:.2f%}),'.format(
                         datanode, used_space)
             self.msg = self.msg.rstrip(',') + ']'
         self.msg += " | 'HDFS imbalance on space used %'={0}".format(
             largest_imbalance_pc)
         self.msg += self.get_perf_thresholds()
         self.msg += " num_datanodes={0}".format(num_datanodes)
         self.msg += " min_used_space={0}".format(min_space)
         self.msg += " max_used_space={0}".format(max_space)
     except KeyError as _:
         raise UnknownError("failed to parse json returned by NameNode at '{0}:{1}': {2}. {3}"\
                            .format(self.host, self.port, _, support_msg_api()))
コード例 #16
0
 def run(self):
     start_time = time.time()
     for page in range(1, self.max_pages + 1):
         url = 'https://registry.hub.docker.com/v2/repositories/{repo}/buildhistory?page={page}'\
               .format(repo=self.repo, page=page)
         req = self.request.get(url)
         if log.isEnabledFor(logging.DEBUG):
             log.debug(jsonpp(req.content))
         json_data = json.loads(req.content)
         log.debug('%s out of %s results returned for page %s',
                   len(json_data['results']), json_data['count'], page)
         if self.process_results(json_data):
             # not quite as accurate as before as it now includes processing time but close enough
             query_time = time.time() - start_time
             if '|' not in self.msg:
                 self.msg += ' |'
             self.msg += ' query_time={0:.2f}s'.format(query_time)
             return True
     extra_info = ''
     if self.verbose:
         extra_info = ' ({0} page{1} of API output)'\
                      .format(self.max_pages, plural(self.max_pages))
     raise UnknownError(
         'no completed builds found in last {0} builds{1}'.format(
             self.max_pages * 10, extra_info))
コード例 #17
0
 def print_stats(self, host):
     stats = self.stats
     show = self.show
     tstamp = time.strftime('%F %T')
     if not stats:
         print(
             "No table regions found for table '{}'. Did you specify the correct table name?"
             .format(self.table))
         sys.exit(1)
     if self.first_iteration:
         log.info(
             'first iteration or recent new region, skipping iteration until we have a differential'
         )
         print('{}\t{} rate stats will be available in next iteration in {} sec{}'\
               .format(tstamp, host, self.interval, plural(self.interval)))
         self.first_iteration = 0
         return
     for table in sorted(stats[host]):
         for region in sorted(stats[host][table]):
             table_region = region
             if len(stats) > 1:
                 table_region = '{}:{}'.format(table, region)
             # maintain explicit order for humans
             # rather than iterate keys of region which will some out in the wrong order
             for metric in ('read', 'write', 'total'):
                 if (not show) or metric in show:
                     print('{:20s}\t{:20s}\t{:40s}\t{:10s}\t{:8.0f}'\
                           .format(tstamp, host, table_region, metric, stats[host][table][region][metric]))
     print()
 def process_bean(self, host, bean, uptime):
     region_regex = re.compile('^Namespace_{namespace}_table_{table}_region_(.+)_metric_(.+)RequestCount'\
                               .format(namespace=self.namespace, table=self.table))
     first_iteration = 1
     for key in sorted(bean):
         match = region_regex.match(key)
         if match:
             region = match.group(1)
             metric = match.group(2)
             #log.debug('match region %s %s request count', region, metric)
             if self.since_uptime:
                 print('{:20s}\t{:20s}\t\t{:10s}\t{:8.0f}'.format(
                     host, region, metric, bean[key] / uptime))
             else:
                 tstamp = time.strftime('%F %T')
                 if region not in self.stats:
                     self.stats[region] = {}
                 if metric in self.stats[region]:
                     print('{}\t{:20s}\t{:20s}\t\t{:10s}\t{:8.0f}'\
                           .format(tstamp, host, region, metric, bean[key] - self.stats[region][metric]))
                 else:
                     if first_iteration:
                         print('{}\trate stats will be available in next iteration in {} sec{}'\
                               .format(tstamp, self.interval, plural(self.interval)))
                     first_iteration = 0
                 self.stats[region][metric] = bean[key]
     print()
コード例 #19
0
 def msg_queue_stats(self, queue_stats):
     matching_queues = len(queue_stats['allowed']) + \
                       len(queue_stats['non-allowed']) + \
                       len(queue_stats['disallowed']) - 3  # account for 'total' in each dict
     self.msg += "{0} matching queue{1}".format(matching_queues, plural(matching_queues))
     for _type in ('disallowed', 'non-allowed', 'allowed'):
         self.msg += ', {0} = {1}'.format(_type, queue_stats[_type]['total'])
     if self.verbose and matching_queues > 1:
         for queue in sorted(list(set(queue_stats['disallowed'].keys() +
                                      queue_stats['non-allowed'].keys() +
                                      queue_stats['allowed'].keys()))):
             if queue == 'total':
                 continue
             for _type in ('disallowed', 'non-allowed', 'allowed'):
                 self.msg += ', {0} {1} = {2}'.format(queue, _type, queue_stats[_type].get(queue, 0))
     self.msg += ' |'
     for _type in ('disallowed', 'non-allowed', 'allowed'):
         self.msg += " '{0}'={1}".format(_type, queue_stats[_type]['total'])
     if self.verbose and matching_queues > 1:
         for queue in sorted(list(set(queue_stats['disallowed'].keys() +
                                      queue_stats['non-allowed'].keys() +
                                      queue_stats['allowed'].keys()))):
             if queue == 'total':
                 continue
             for _type in ('disallowed', 'non-allowed', 'allowed'):
                 self.msg += " '{0} {1}'={2}".format(queue, _type, queue_stats[_type].get(queue, 0))
     return queue_stats
コード例 #20
0
 def timeout_handler(self, signum, frame):  # pylint: disable=unused-argument
     for child in psutil.Process().children():
         child.kill()
     time.sleep(1)
     qquit(
         'UNKNOWN', 'self timed out after %d second%s' %
         (self.timeout, plural(self.timeout)))
 def process_stats(self, stats):
     lowest_requests = None
     highest_requests = None
     lowest_regionserver = None
     highest_regionserver = None
     for regionserver in stats:
         if lowest_requests is None:
             lowest_requests = stats[regionserver]
             lowest_regionserver = regionserver
         if highest_requests is None:
             highest_requests = stats[regionserver]
             highest_regionserver = regionserver
         if stats[regionserver] > highest_requests:
             highest_requests = stats[regionserver]
             highest_regionserver = regionserver
         if stats[regionserver] < lowest_requests:
             lowest_requests = stats[regionserver]
             lowest_regionserver = regionserver
     # simple algo - let me know if you think can be a better calculation
     imbalance = (highest_requests - lowest_requests) / max(highest_requests, 1) * 100
     num_regionservers = len(stats)
     self.msg = 'HBase RegionServers reqs/sec imbalance = {:.0f}% across {} RegionServer{}'\
                .format(imbalance, num_regionservers, plural(num_regionservers))
     self.check_thresholds(imbalance)
     if self.verbose or not self.is_ok():
         self.msg += ' [min reqs/sec={} on {} / max reqs/sec={} on {}]'\
                     .format(lowest_requests, lowest_regionserver, highest_requests, highest_regionserver)
     self.msg += ' | reqs_per_sec_balance={:.2f}%{} lowest_requests_per_sec={} highest_requests_per_sec={}'\
                 .format(imbalance, self.get_perf_thresholds(), lowest_requests, highest_requests)
コード例 #22
0
 def process_stats(self, stats):
     lowest_requests = None
     highest_requests = None
     lowest_regionserver = None
     highest_regionserver = None
     for regionserver in stats:
         if lowest_requests is None:
             lowest_requests = stats[regionserver]
             lowest_regionserver = regionserver
         if highest_requests is None:
             highest_requests = stats[regionserver]
             highest_regionserver = regionserver
         if stats[regionserver] > highest_requests:
             highest_requests = stats[regionserver]
             highest_regionserver = regionserver
         if stats[regionserver] < lowest_requests:
             lowest_requests = stats[regionserver]
             lowest_regionserver = regionserver
     # simple algo - let me know if you think can be a better calculation
     imbalance = (highest_requests - lowest_requests) / max(
         highest_requests, 1) * 100
     num_regionservers = len(stats)
     self.msg = 'HBase RegionServers reqs/sec imbalance = {:.0f}% across {} RegionServer{}'\
                .format(imbalance, num_regionservers, plural(num_regionservers))
     self.check_thresholds(imbalance)
     if self.verbose or not self.is_ok():
         self.msg += ' [min reqs/sec={} on {} / max reqs/sec={} on {}]'\
                     .format(lowest_requests, lowest_regionserver, highest_requests, highest_regionserver)
     self.msg += ' | reqs_per_sec_balance={:.2f}%{} lowest_requests_per_sec={} highest_requests_per_sec={}'\
                 .format(imbalance, self.get_perf_thresholds(), lowest_requests, highest_requests)
コード例 #23
0
 def msg_queue_stats(self, queue_stats):
     matching_queues = len(queue_stats['allowed']) + \
                       len(queue_stats['non-allowed']) + \
                       len(queue_stats['disallowed']) - 3  # account for 'total' in each dict
     self.msg += "{0} matching queue{1}".format(matching_queues, plural(matching_queues))
     for _type in ('disallowed', 'non-allowed', 'allowed'):
         self.msg += ', {0} = {1}'.format(_type, queue_stats[_type]['total'])
     if self.verbose and matching_queues > 1:
         for queue in sorted(list(set(queue_stats['disallowed'].keys() +
                                      queue_stats['non-allowed'].keys() +
                                      queue_stats['allowed'].keys()))):
             if queue == 'total':
                 continue
             for _type in ('disallowed', 'non-allowed', 'allowed'):
                 self.msg += ', {0} {1} = {2}'.format(queue, _type, queue_stats[_type].get(queue, 0))
     self.msg += ' |'
     for _type in ('disallowed', 'non-allowed', 'allowed'):
         self.msg += " '{0}'={1}".format(_type, queue_stats[_type]['total'])
     if self.verbose and matching_queues > 1:
         for queue in sorted(list(set(queue_stats['disallowed'].keys() +
                                      queue_stats['non-allowed'].keys() +
                                      queue_stats['allowed'].keys()))):
             if queue == 'total':
                 continue
             for _type in ('disallowed', 'non-allowed', 'allowed'):
                 self.msg += " '{0} {1}'={2}".format(queue, _type, queue_stats[_type].get(queue, 0))
     return queue_stats
コード例 #24
0
 def print_stats(self, host):
     stats = self.stats
     tstamp = time.strftime('%F %T')
     if not stats:
         print(
             "No regionserver stats found. Did you specify correct regionserver addresses and --port?"
         )
         sys.exit(1)
     if self.first_iteration:
         log.info(
             'first iteration, skipping iteration until we have a differential'
         )
         print('{}\t{} rate stats will be available in next iteration in {} sec{}'\
               .format(tstamp, host, self.interval, plural(self.interval)))
         self.first_iteration = 0
         return
     for metric in self.request_types:
         if self.request_type and metric not in self.request_type:
             continue
         try:
             val = '{:8.0f}'.format(stats[host][metric])
         # might happen if server is down for maintenance - in which case N/A and retry later rather than crash
         except KeyError:
             val = 'N/A'
         print('{:20s}\t{:20s}\t{:10s}\t{}'\
               .format(tstamp, host, metric, val))
     print()
コード例 #25
0
 def run(self):
     self.no_args()
     directory = self.get_opt('directory')
     validate_directory(directory)
     directory = os.path.abspath(directory)
     try:
         repo = git.Repo(directory)
     except InvalidGitRepositoryError as _:
         raise CriticalError(
             "directory '{}' does not contain a valid Git repository!".
             format(directory))
     try:
         untracked_files = repo.untracked_files
         num_untracked_files = len(untracked_files)
         changed_files = [item.a_path for item in repo.index.diff(None)]
         changed_files = [
             filename for filename in changed_files
             if filename not in untracked_files
         ]
         num_changed_files = len(changed_files)
     except InvalidGitRepositoryError as _:
         raise CriticalError(_)
     except TypeError as _:
         raise CriticalError(_)
     self.msg = '{} changed file{}'.format(num_changed_files,
                                           plural(num_changed_files))
     self.msg += ', {} untracked file{}'.format(num_untracked_files,
                                                plural(num_untracked_files))
     self.msg += " in Git checkout at directory '{}'".format(directory)
     uncommitted_staged_changes = 0
     if changed_files or untracked_files:
         self.critical()
         if self.verbose:
             if changed_files:
                 self.msg += ' (changed files: {})'.format(
                     ', '.join(changed_files))
             if untracked_files:
                 self.msg += ' (untracked files: {})'.format(
                     ', '.join(untracked_files))
     elif repo.is_dirty():
         self.msg += ', uncommitted staged changes detected!'
         self.critical()
         uncommitted_staged_changes = 1
     self.msg += ' | changed_files={};0;0 untracked_files={};0;0'.format(
         num_changed_files, num_untracked_files)
     self.msg += ' uncommitted_staged_changes={};0;0'.format(
         uncommitted_staged_changes)
コード例 #26
0
 def check_times(self, start_date, end_date):
     start_date = str(start_date).strip()
     end_date = str(end_date).strip()
     invalid_dates = ('', 'null', 'None', None)
     age_timedelta = None
     runtime_delta = None
     if start_date not in invalid_dates and \
        end_date not in invalid_dates:
         try:
             start_datetime = datetime.strptime(start_date, '%m/%d/%Y %H:%M:%S')
             end_datetime = datetime.strptime(end_date, '%m/%d/%Y %H:%M:%S')
         except ValueError as _:
             qquit('UNKNOWN', 'error parsing date time format: {0}'.format(_))
         runtime_delta = end_datetime - start_datetime
         runtime_delta_secs = self.timedelta_seconds(runtime_delta)
         self.msg += ' in {0}'.format(sec2human(runtime_delta_secs))
         if self.max_runtime is not None and (runtime_delta_secs / 60.0) > self.max_runtime:
             self.warning()
             self.msg += ' (greater than {0} min{1}!)'.format(str(self.max_runtime).rstrip('0').rstrip('.'),
                                                              plural(self.max_runtime))
         if self.min_runtime is not None and (runtime_delta_secs / 60.0) < self.min_runtime:
             self.warning()
             self.msg += ' (less than {0} min{1}!)'.format(str(self.min_runtime).rstrip('0').rstrip('.'),
                                                           plural(self.min_runtime))
         age_timedelta = datetime.now() - start_datetime
         age_timedelta_secs = self.timedelta_seconds(age_timedelta)
     if self.verbose:
         self.msg += ", start date = '{startdate}', end date = '{enddate}'".\
                     format(startdate=start_date, enddate=end_date)
         if age_timedelta is not None:
             self.msg += ', started {0} ago'.format(sec2human(age_timedelta_secs))
     if self.max_age is not None and age_timedelta is not None \
        and age_timedelta_secs > (self.max_age * 60.0):
         self.warning()
         self.msg += ' (last run started more than {0} min{1} ago!)'.format(str(self.max_age)
                                                                            .rstrip('0')
                                                                            .rstrip('.'),
                                                                            plural(self.max_age))
     # Do not output variable number of fields at all if agedelta is not available as that breaks PNP4Nagios graphing
     if age_timedelta is not None and runtime_delta:
         self.msg += ' |'
         self.msg += ' runtime={0}s;{1}'.format(runtime_delta_secs, self.max_runtime * 60 \
                                                                         if self.max_runtime else '')
         self.msg += ' age={0}s;{1}'.format(age_timedelta_secs, self.max_age * 60 if self.max_age else '')
         self.msg += ' auth_time={auth_time}s query_time={query_time}s'.format(auth_time=self.auth_time,
                                                                               query_time=self.query_time)
コード例 #27
0
 def check_ingestion(self,
                     num,
                     filter_opts=None,
                     max_age=None,
                     max_runtime=None):
     log.info('checking ingestion history')
     json_dict = self.get_ingestions(num, filter_opts)
     info = ''
     if self.verbose:
         for key in sorted(filter_opts):
             info += " {0}='{1}'".format(key, filter_opts[key])
     try:
         results = json_dict['result']
         if not results:
             qquit('CRITICAL', "no results found for ingestion{0}"\
                   .format('{0}. {1}'.format(info, self.extract_response_message(json_dict)) + \
                   'Perhaps you specified incorrect filters? Use --list to see existing ingestions'))
         num_results = len(results)
         log.info('%s ingestion history results returned', num_results)
         self.check_statuses(results)
         if num:
             self.msg += ' out of last {0} ingest{1}'.format(
                 num_results, plural(num_results))
         if self.history_mins:
             self.msg += ' within last {0} ({1} min{2})'.format(
                 sec2human(self.history_mins * 60),
                 str(self.history_mins).rstrip('0').rstrip('.'),
                 plural(self.history_mins))
         longest_incomplete_timedelta = self.check_longest_incomplete_ingest(
             results, max_runtime)
         age_timedelta_secs = self.check_last_ingest_age(results,
                                                         max_age=max_age)
         self.msg_filter_details(filter_opts=filter_opts)
         self.msg += ' |'
         self.msg += ' last_ingest_age={0}s;{1}'.format(
             age_timedelta_secs, max_age * 3600 if max_age else '')
         self.msg += ' longest_incomplete_ingest_age={0}s;{1}'\
                     .format(self.timedelta_seconds(longest_incomplete_timedelta)
                             if longest_incomplete_timedelta else 0,
                             max_age * 3600 if max_age else '')
         self.msg += ' auth_time={auth_time}s query_time={query_time}s'.format(
             auth_time=self.auth_time, query_time=self.query_time)
     except KeyError as _:
         qquit('UNKNOWN',
               'error parsing workflow execution history: {0}'.format(_))
コード例 #28
0
ファイル: cli.py プロジェクト: smutel/pylib
 def timeout_handler(self, signum, frame): # pylint: disable=unused-argument
     # problem with this is that it'll print and then the exit exception will be caught and quit() printed again
     # raising a custom TimeoutException will need to be handled in main, but that would also likely print and be
     # re-caught and re-printed by NagiosPlugin
     #print('self timed out after %d second%s' % (self.timeout, plural(self.timeout)))
     #sys.exit(ERRORS['UNKNOWN'])
     # if doing die the same thing same will happen since die is a custom func which prints and then calls exit,
     # only exit would be caught
     qquit('UNKNOWN', 'self timed out after %d second%s' % (self.timeout, plural(self.timeout)))
コード例 #29
0
 def parse_json(self, json_data):
     if not isList(json_data):
         raise UnknownError(
             'non-list returned by Presto for nodes failed. {0}'.format(
                 support_msg_api()))
     num_failed_nodes = len(json_data)
     self.msg = 'Presto SQL - {0} worker node{1} failed'.format(
         num_failed_nodes, plural(num_failed_nodes))
     self.check_thresholds(num_failed_nodes)
コード例 #30
0
ファイル: cli.py プロジェクト: HariSekhon/pylib
 def timeout_handler(self, signum, frame): # pylint: disable=unused-argument
     # problem with this is that it'll print and then the exit exception will be caught and quit() printed again
     # raising a custom TimeoutException will need to be handled in main, but that would also likely print and be
     # re-caught and re-printed by NagiosPlugin
     #print('self timed out after %d second%s' % (self.timeout, plural(self.timeout)))
     #sys.exit(ERRORS['UNKNOWN'])
     # if doing die the same thing same will happen since die is a custom func which prints and then calls exit,
     # only exit would be caught
     qquit('UNKNOWN', 'self timed out after %d second%s' % (self.timeout, plural(self.timeout)))
 def parse_json(self, json_data):
     app_list = self.get_app_list(json_data)
     (num_shells_breaching_sla, num_matching_apps, max_elapsed, max_threshold_msg) = \
                                                                             self.check_app_elapsed_times(app_list)
     self.msg += '{0}, checked {1} Spark Shell{2} out of {3} running apps'\
                .format(num_shells_breaching_sla, num_matching_apps, plural(num_matching_apps), len(app_list)) + \
                ', longest running Spark Shell = {0} secs{1}'\
                .format(max_elapsed, max_threshold_msg)
     self.msg += ' | num_spark_shells_breaching_SLA={0} max_elapsed_spark_shell_time={1}{2}'\
                 .format(num_shells_breaching_sla, max_elapsed, self.get_perf_thresholds())
コード例 #32
0
 def parse_json(self, json_data):
     log.info('parsing response')
     try:
         live_nodes = json_data['beans'][0]['LiveNodes']
         live_node_data = json.loads(live_nodes)
         num_datanodes = len(live_node_data)
         if num_datanodes < 1:
             raise CriticalError("no live datanodes returned by JMX API from namenode '{0}:{1}'"\
                                 .format(self.host, self.port))
         max_blocks = 0
         min_blocks = None
         for datanode in live_node_data:
             blocks = live_node_data[datanode]['numBlocks']
             if not isInt(blocks):
                 raise UnknownError(
                     'numBlocks {} is not an integer! {}'.format(
                         blocks, support_msg_api()))
             blocks = int(blocks)
             log.info("datanode '%s' has %s blocks", datanode, blocks)
             if blocks > max_blocks:
                 max_blocks = blocks
             if min_blocks is None or blocks < min_blocks:
                 min_blocks = blocks
         log.info("max blocks on a single datanode = %s", max_blocks)
         log.info("min blocks on a single datanode = %s", min_blocks)
         if min_blocks is None:
             raise UnknownError('min_blocks is None')
         divisor = min_blocks
         if min_blocks < 1:
             log.info(
                 "min blocks < 1, resetting divisor to 1 (% will be very high)"
             )
             divisor = 1
         block_imbalance = float("{0:.2f}".format(
             (max_blocks - min_blocks) / divisor * 100))
         self.msg = '{0}% block imbalance across {1} datanode{2}'\
                    .format(block_imbalance, num_datanodes, plural(num_datanodes))
         self.ok()
         self.check_thresholds(block_imbalance)
         if self.verbose:
             self.msg += ' (min blocks = {0}, max blocks = {1})'.format(
                 min_blocks, max_blocks)
         self.msg += " | block_imbalance={0}%".format(block_imbalance)
         self.msg += self.get_perf_thresholds()
         self.msg += " num_datanodes={0}".format(num_datanodes)
         self.msg += " min_blocks={0}".format(min_blocks)
         self.msg += " max_blocks={0}".format(max_blocks)
     except KeyError as _:
         raise UnknownError("failed to parse json returned by NameNode at '{0}:{1}': {2}. {3}"\
                            .format(self.host, self.port, _, support_msg_api()))
     except ValueError as _:
         raise UnknownError("invalid json returned for LiveNodes by Namenode '{0}:{1}': {2}"\
                            .format(self.host, self.port, _))
コード例 #33
0
 def run(self):
     racks = self.get_rack_info()
     num_racks = len(racks)
     self.msg = '{} rack{} configured'.format(num_racks, plural(num_racks))
     if num_racks < 2:
         self.warning()
         self.msg += ' (no rack resilience!)'
     default_rack = '/default-rack'
     num_nodes_left_in_default_rack = 0
     if default_rack in racks:
         self.warning()
         num_nodes_left_in_default_rack = len(racks[default_rack])
         msg = "{num} node{plural} left in '{default_rack}'!"\
               .format(num=num_nodes_left_in_default_rack,
                       plural=plural(num_nodes_left_in_default_rack),
                       default_rack=default_rack)
         if self.verbose:
             msg += ' [{}]'.format(', '.join(racks[default_rack]))
         self.msg = msg + ' - ' + self.msg
     self.msg += ' | hdfs_racks={};2 nodes_in_default_rack={};0 query_time={:.2f}s'\
                 .format(num_racks, num_nodes_left_in_default_rack, self.query_time)
コード例 #34
0
 def run(self):
     racks = self.get_rack_info()
     num_racks = len(racks)
     self.msg = '{} rack{} configured'.format(num_racks, plural(num_racks))
     if num_racks < 2:
         self.warning()
         self.msg += ' (no rack resilience!)'
     default_rack = '/default-rack'
     num_nodes_left_in_default_rack = 0
     if default_rack in racks:
         self.warning()
         num_nodes_left_in_default_rack = len(racks[default_rack])
         msg = "{num} node{plural} left in '{default_rack}'!"\
               .format(num=num_nodes_left_in_default_rack,
                       plural=plural(num_nodes_left_in_default_rack),
                       default_rack=default_rack)
         if self.verbose:
             msg += ' [{}]'.format(', '.join(racks[default_rack]))
         self.msg = msg + ' - ' + self.msg
     self.msg += ' | hdfs_racks={};2 nodes_in_default_rack={};0 query_time={:.2f}s'\
                 .format(num_racks, num_nodes_left_in_default_rack, self.query_time)
コード例 #35
0
 def end(self):
     if self.node_count is None:
         raise UnknownError('node count is not set!')
     self.msg = '{0} {1}{2} {3}'.format(self.node_count, self.agent_name, plural(self.node_count), self.state)
     self.check_thresholds(name=self.agent_name, result=self.node_count)
     if self.additional_info:
         self.msg += ', {0}'.format(self.additional_info)
     self.msg += ' | {0}s_{1}={2:d}{3}'.format(self.agent_name, self.state,
                                               self.node_count, self.get_perf_thresholds())
     if self.additional_perfdata:
         self.msg += ' {0}'.format(self.additional_perfdata)
     qquit(self.status, self.msg)
コード例 #36
0
 def end(self):
     if self.node_count is None:
         raise UnknownError('node count is not set!')
     self.msg = '{0} {1}{2} {3}'.format(self.node_count, self.agent_name, plural(self.node_count), self.state)
     self.check_thresholds(self.node_count)
     if self.additional_info:
         self.msg += ', {0}'.format(self.additional_info)
     self.msg += ' | {0}s_{1}={2:d}s{3}'.format(self.agent_name, self.state,
                                                self.node_count, self.get_perf_thresholds())
     if self.additional_perfdata:
         self.msg += ' {0}'.format(self.additional_perfdata)
     qquit(self.status, self.msg)
コード例 #37
0
 def check_last_ingest_age(self, ingestion_date, max_age):
     log.info('checking last ingest age')
     age_timedelta = self.get_timedelta(ingestion_date=ingestion_date)
     if self.verbose:
         self.msg += ", last ingest start date = '{ingestion_date}'".format(
             ingestion_date=ingestion_date)
         self.msg += ', started {0} ago'.format(
             sec2human(age_timedelta.seconds))
     if max_age is not None and age_timedelta.seconds > (max_age * 60.0):
         self.warning()
         self.msg += ' (last run started more than {0} min{1} ago!)'.format(
             str(max_age).rstrip('0').rstrip('.'), plural(max_age))
     return age_timedelta
コード例 #38
0
 def check_statuses(self, results):
     # known statuses from doc: SUCCESS / INGESTION FAILED / WORKFLOW FAILED / INCOMPLETE
     log.info('checking statuses')
     result_statuses = {}
     num_results = len(results)
     for item in results:
         status = item['status']
         result_statuses[status] = result_statuses.get(status, 0)
         result_statuses[status] += 1
     if not result_statuses:
         code_error('no ingestion status results parsed')
     if 'SUCCESS' not in result_statuses:
         self.msg += 'NO SUCCESSFUL INGESTS in history of last {0} ingest runs! '.format(num_results)
         self.warning()
     self.msg += 'ingestion{0} status: '.format(plural(num_results))
     for status in result_statuses:
         if status not in ('SUCCESS', 'INCOMPLETE'):
             self.critical()
         self.msg += '{0} = {1} time{2}, '.format(status, result_statuses[status],
                                                  plural(result_statuses[status]))
     self.msg = self.msg.rstrip(', ')
     return result_statuses
コード例 #39
0
 def parse_json(self, json_data):
     log.info('parsing response')
     try:
         live_nodes_str = json_data['beans'][0]['LiveNodes']
         dead_nodes_str = json_data['beans'][0]['DeadNodes']
         decom_nodes_str = json_data['beans'][0]['DecomNodes']
         live_nodes = json.loads(live_nodes_str)
         dead_nodes = json.loads(dead_nodes_str)
         decom_nodes = json.loads(decom_nodes_str)
         self.print_nodes(live_nodes=live_nodes,
                          dead_nodes=dead_nodes,
                          decom_nodes=decom_nodes)
         last_contact_secs = None
         for item in live_nodes:
             if self.match_datanode(self.datanode, item):
                 last_contact_secs = live_nodes[item]['lastContact']
         # always check decom and dead nodes regardless if last_contact_secs was found in live nodes
         # gives an additional safety check to escalate to warning / critical
         self.msg = ''
         for item in decom_nodes:
             if self.match_datanode(self.datanode, item):
                 last_contact_secs = decom_nodes[item]['lastContact']
                 self.warning()
                 self.msg = 'Decommissioning '
         for item in dead_nodes:
             if self.match_datanode(self.datanode, item):
                 last_contact_secs = dead_nodes[item]['lastContact']
                 self.critical()
                 self.msg = 'Dead '
         if last_contact_secs is None:
             raise UnknownError("datanode '{0}' is not present in any of the live, ".format(self.datanode) + \
                                "decommissioning or dead node lists!")
         if not isInt(last_contact_secs):
             raise UnknownError("non-integer '{0}' returned for last contact seconds by namenode '{1}:{2}'"\
                                .format(last_contact_secs, self.host, self.port))
         last_contact_secs = int(last_contact_secs)
         if last_contact_secs < 0:
             raise UnknownError(
                 'last_contact_secs {} < 0!'.format(last_contact_secs))
         self.msg += "HDFS datanode '{0}' last contact with namenode was {1} sec{2} ago"\
                    .format(self.datanode, last_contact_secs, plural(last_contact_secs))
         self.check_thresholds(last_contact_secs)
         self.msg += ' | datanode_last_contact_secs={0}'.format(
             last_contact_secs)
         self.msg += self.get_perf_thresholds()
     except KeyError as _:
         raise UnknownError("failed to parse json returned by NameNode at '{0}:{1}': {2}. {3}"\
                            .format(self.host, self.port, _, support_msg_api()))
     except ValueError as _:
         raise UnknownError("invalid json returned for LiveNodes by Namenode '{0}:{1}': {2}"\
                            .format(self.host, self.port, _))
コード例 #40
0
 def parse_json(self, json_data):
     log.info('parsing response')
     try:
         live_nodes = json_data['beans'][0]['LiveNodes']
         live_node_data = json.loads(live_nodes)
         num_datanodes = len(live_node_data)
         if num_datanodes < 1:
             raise CriticalError("no live datanodes returned by JMX API from namenode '{0}:{1}'"\
                                 .format(self.host, self.port))
         min_space = None
         max_space = 0
         for datanode in live_node_data:
             used_space = live_node_data[datanode]['usedSpace']
             if not isInt(used_space):
                 raise UnknownError('usedSpace {} is not an integer! {}'.format(used_space, support_msg_api()))
             used_space = int(used_space)
             log.info("datanode '%s' used space = %s", datanode, used_space)
             if min_space is None or used_space < min_space:
                 min_space = used_space
             if used_space > max_space:
                 max_space = used_space
         divisor = max_space
         if divisor < 1:
             log.info('min used space < 1, resetting divisor to 1 (% will likely be very high)')
             divisor = 1
         if max_space < min_space:
             raise UnknownError('max_space < min_space')
         largest_imbalance_pc = float('{0:.2f}'.format(((max_space - min_space) / divisor) * 100))
         if largest_imbalance_pc < 0:
             raise UnknownError('largest_imbalance_pc < 0')
         self.ok()
         self.msg = '{0}% HDFS imbalance on space used'.format(largest_imbalance_pc)
         self.check_thresholds(largest_imbalance_pc)
         self.msg += ' across {0:d} datanode{1}'.format(num_datanodes, plural(num_datanodes))
         if self.verbose:
             self.msg += ', min used space = {0}, max used space = {1}'.format(min_space, max_space)
         if self.verbose and (self.is_warning() or self.is_critical()):
             self.msg += ' [imbalanced nodes: '
             for datanode in live_node_data:
                 used_space = live_node_data[datanode]['usedSpace']
                 if (used_space / max_space * 100) > self.thresholds['warning']['upper']:
                     self.msg += '{0}({1:.2f%}),'.format(datanode, used_space)
             self.msg = self.msg.rstrip(',') + ']'
         self.msg += " | 'HDFS imbalance on space used %'={0}".format(largest_imbalance_pc)
         self.msg += self.get_perf_thresholds()
         self.msg += " num_datanodes={0}".format(num_datanodes)
         self.msg += " min_used_space={0}".format(min_space)
         self.msg += " max_used_space={0}".format(max_space)
     except KeyError as _:
         raise UnknownError("failed to parse json returned by NameNode at '{0}:{1}': {2}. {3}"\
                            .format(self.host, self.port, _, support_msg_api()))
コード例 #41
0
    def run(self):
        server_url = '{proto}://{host}:{port}'.format(proto=self.protocol,
                                                      host=self.host,
                                                      port=self.port)
        try:
            log.debug('setting up Jenkins connection to %s', server_url)
            start_time = time.time()
            server = jenkins.Jenkins(server_url,
                                     username=self.user,
                                     password=self.password,
                                     timeout=self.timeout / 3)
            if log.isEnabledFor(logging.DEBUG):
                log.debug('getting user')
                user = server.get_whoami()
                log.debug('connected as user %s', jsonpp(user))
            log.debug('getting plugin info')
            #plugins = server.get_plugins()
            # deprecated but .get_plugins() output is not JSON serializable
            # so must use old deprecated method get_plugins_info() :-/
            plugins = server.get_plugins_info()
            query_time = time.time() - start_time
        except jenkins.JenkinsException as _:
            raise CriticalError(_)

        if log.isEnabledFor(logging.DEBUG):
            log.debug('%s', jsonpp(plugins))
        plugin_count = len(plugins)
        update_count = 0
        for plugin in plugins:
            if plugin['hasUpdate']:
                update_count += 1
        self.msg += " {0} plugin update{1} available out of {2} installed plugin{3}".format(
            update_count, plural(update_count), plugin_count,
            plural(plugin_count))
        if update_count:
            self.warning()
        self.msg += ' | updates_available={0};1 plugins_installed={1} query_time={2:.4f}s'.format(
            update_count, plugin_count, query_time)
コード例 #42
0
 def run(self):
     iam = boto3.client('iam')
     log.info('getting account summary')
     _ = iam.get_account_summary()
     log.debug('%s', jsonpp(_))
     account_summary = _['SummaryMap']
     mfa_enabled = account_summary['AccountMFAEnabled']
     access_keys = account_summary['AccountAccessKeysPresent']
     if access_keys or not mfa_enabled:
         self.warning()
     self.msg = 'AWS root account MFA enabled = {}{}'.format(
         bool(mfa_enabled), ' (!)' if not mfa_enabled else "")
     self.msg += ', {} access key{} found{}'.format(
         access_keys, plural(access_keys), ' (!)' if access_keys else "")
コード例 #43
0
 def parse_json(self, json_data):
     log.info('parsing response')
     try:
         live_nodes_str = json_data['beans'][0]['LiveNodes']
         dead_nodes_str = json_data['beans'][0]['DeadNodes']
         decom_nodes_str = json_data['beans'][0]['DecomNodes']
         live_nodes = json.loads(live_nodes_str)
         dead_nodes = json.loads(dead_nodes_str)
         decom_nodes = json.loads(decom_nodes_str)
         self.print_nodes(live_nodes=live_nodes,
                          dead_nodes=dead_nodes,
                          decom_nodes=decom_nodes)
         last_contact_secs = None
         for item in live_nodes:
             if self.match_datanode(self.datanode, item):
                 last_contact_secs = live_nodes[item]['lastContact']
         # always check decom and dead nodes regardless if last_contact_secs was found in live nodes
         # gives an additional safety check to escalate to warning / critical
         self.msg = ''
         for item in decom_nodes:
             if self.match_datanode(self.datanode, item):
                 last_contact_secs = decom_nodes[item]['lastContact']
                 self.warning()
                 self.msg = 'Decommissioning '
         for item in dead_nodes:
             if self.match_datanode(self.datanode, item):
                 last_contact_secs = dead_nodes[item]['lastContact']
                 self.critical()
                 self.msg = 'Dead '
         if last_contact_secs is None:
             raise UnknownError("datanode '{0}' is not present in any of the live, ".format(self.datanode) + \
                                "decommissioning or dead node lists!")
         if not isInt(last_contact_secs):
             raise UnknownError("non-integer '{0}' returned for last contact seconds by namenode '{1}:{2}'"\
                                .format(last_contact_secs, self.host, self.port))
         last_contact_secs = int(last_contact_secs)
         if last_contact_secs < 0:
             raise UnknownError('last_contact_secs {} < 0!'.format(last_contact_secs))
         self.msg += "HDFS datanode '{0}' last contact with namenode was {1} sec{2} ago"\
                    .format(self.datanode, last_contact_secs, plural(last_contact_secs))
         self.check_thresholds(last_contact_secs)
         self.msg += ' | datanode_last_contact_secs={0}'.format(last_contact_secs)
         self.msg += self.get_perf_thresholds()
     except KeyError as _:
         raise UnknownError("failed to parse json returned by NameNode at '{0}:{1}': {2}. {3}"\
                            .format(self.host, self.port, _, support_msg_api()))
     except ValueError as _:
         raise UnknownError("invalid json returned for LiveNodes by Namenode '{0}:{1}': {2}"\
                            .format(self.host, self.port, _))
コード例 #44
0
 def check_ingestion(self, num, filter_opts=None, max_age=None, max_runtime=None):
     log.info('checking ingestion history')
     json_dict = self.get_ingestions(num, filter_opts)
     info = ''
     if self.verbose:
         for key in sorted(filter_opts):
             info += " {0}='{1}'".format(key, filter_opts[key])
     try:
         results = json_dict['result']
         if not results:
             qquit('CRITICAL', "no results found for ingestion{0}"\
                   .format('{0}. {1}'.format(info, self.extract_response_message(json_dict)) + \
                   'Perhaps you specified incorrect filters? Use --list to see existing ingestions'))
         num_results = len(results)
         log.info('%s ingestion history results returned', num_results)
         self.check_statuses(results)
         if num:
             self.msg += ' out of last {0} ingest{1}'.format(num_results, plural(num_results))
         if self.history_mins:
             self.msg += ' within last {0} ({1} min{2})'.format(sec2human(self.history_mins * 60),
                                                                str(self.history_mins).rstrip('0').rstrip('.'),
                                                                plural(self.history_mins))
         longest_incomplete_timedelta = self.check_longest_incomplete_ingest(results, max_runtime)
         age_timedelta_secs = self.check_last_ingest_age(results, max_age=max_age)
         self.msg_filter_details(filter_opts=filter_opts)
         self.msg += ' |'
         self.msg += ' last_ingest_age={0}s;{1}'.format(age_timedelta_secs,
                                                        max_age * 3600 if max_age else '')
         self.msg += ' longest_incomplete_ingest_age={0}s;{1}'\
                     .format(self.timedelta_seconds(longest_incomplete_timedelta)
                             if longest_incomplete_timedelta else 0,
                             max_age * 3600 if max_age else '')
         self.msg += ' auth_time={auth_time}s query_time={query_time}s'.format(auth_time=self.auth_time,
                                                                               query_time=self.query_time)
     except KeyError as _:
         qquit('UNKNOWN', 'error parsing workflow execution history: {0}'.format(_))
コード例 #45
0
 def run(self):
     self.no_args()
     directory = self.get_opt('directory')
     validate_directory(directory)
     directory = os.path.abspath(directory)
     try:
         repo = git.Repo(directory)
     except InvalidGitRepositoryError as _:
         raise CriticalError("directory '{}' does not contain a valid Git repository!".format(directory))
     try:
         untracked_files = repo.untracked_files
         num_untracked_files = len(untracked_files)
         changed_files = [item.a_path for item in repo.index.diff(None)]
         changed_files = [filename for filename in changed_files if filename not in untracked_files]
         num_changed_files = len(changed_files)
     except InvalidGitRepositoryError as _:
         raise CriticalError(_)
     except TypeError as _:
         raise CriticalError(_)
     self.msg = '{} changed file{}'.format(num_changed_files, plural(num_changed_files))
     self.msg += ', {} untracked file{}'.format(num_untracked_files, plural(num_untracked_files))
     self.msg += " in Git checkout at directory '{}'".format(directory)
     uncommitted_staged_changes = 0
     if changed_files or untracked_files:
         self.critical()
         if self.verbose:
             if changed_files:
                 self.msg += ' (changed files: {})'.format(', '.join(changed_files))
             if untracked_files:
                 self.msg += ' (untracked files: {})'.format(', '.join(untracked_files))
     elif repo.is_dirty():
         self.msg += ', uncommitted staged changes detected!'
         self.critical()
         uncommitted_staged_changes = 1
     self.msg += ' | changed_files={};0;0 untracked_files={};0;0'.format(num_changed_files, num_untracked_files)
     self.msg += ' uncommitted_staged_changes={};0;0'.format(uncommitted_staged_changes)
コード例 #46
0
 def parse_json(self, json_data):
     dynamic = self.get_key(json_data, 'dynamic')
     peers = self.get_key(json_data, 'peers')
     if not isList(peers):
         raise UnknownError('\'peers\' field is not a list as expected! {0}'.format(support_msg_api()))
     peer_count = len(peers)
     if self.regex:
         regex = re.compile(self.regex, re.I)
         if not self.find_peer(regex, peers):
             self.msg += 'no peer found matching \'{0}\', '.format(self.regex)
             self.critical()
     self.msg += '{0} peer{1} found'.format(peer_count, plural(peer_count))
     self.check_thresholds(peer_count)
     self.msg += ', dynamic = {0}'.format(dynamic)
     self.msg += ' | hiveserver2_llap_peers={0}{1}'.format(peer_count, self.get_perf_thresholds(boundary='lower'))
コード例 #47
0
 def parse_json(self, json_data):
     log.info('parsing response')
     try:
         live_nodes = json_data['beans'][0]['LiveNodes']
         live_node_data = json.loads(live_nodes)
         num_datanodes = len(live_node_data)
         if num_datanodes < 1:
             raise CriticalError("no live datanodes returned by JMX API from namenode '{0}:{1}'"\
                                 .format(self.host, self.port))
         max_blocks = 0
         min_blocks = None
         for datanode in live_node_data:
             blocks = live_node_data[datanode]['numBlocks']
             if not isInt(blocks):
                 raise UnknownError('numBlocks {} is not an integer! {}'.format(blocks, support_msg_api()))
             blocks = int(blocks)
             log.info("datanode '%s' has %s blocks", datanode, blocks)
             if blocks > max_blocks:
                 max_blocks = blocks
             if min_blocks is None or blocks < min_blocks:
                 min_blocks = blocks
         log.info("max blocks on a single datanode = %s", max_blocks)
         log.info("min blocks on a single datanode = %s", min_blocks)
         if min_blocks is None:
             raise UnknownError('min_blocks is None')
         divisor = min_blocks
         if min_blocks < 1:
             log.info("min blocks < 1, resetting divisor to 1 (% will be very high)")
             divisor = 1
         block_imbalance = float("{0:.2f}".format((max_blocks - min_blocks) / divisor * 100))
         self.msg = '{0}% block imbalance across {1} datanode{2}'\
                    .format(block_imbalance, num_datanodes, plural(num_datanodes))
         self.ok()
         self.check_thresholds(block_imbalance)
         if self.verbose:
             self.msg += ' (min blocks = {0}, max blocks = {1})'.format(min_blocks, max_blocks)
         self.msg += " | block_imbalance={0}%".format(block_imbalance)
         self.msg += self.get_perf_thresholds()
         self.msg += " num_datanodes={0}".format(num_datanodes)
         self.msg += " min_blocks={0}".format(min_blocks)
         self.msg += " max_blocks={0}".format(max_blocks)
     except KeyError as _:
         raise UnknownError("failed to parse json returned by NameNode at '{0}:{1}': {2}. {3}"\
                            .format(self.host, self.port, _, support_msg_api()))
     except ValueError as _:
         raise UnknownError("invalid json returned for LiveNodes by Namenode '{0}:{1}': {2}"\
                            .format(self.host, self.port, _))
コード例 #48
0
 def check_missing_traits(self, traits):
     if not isList(traits):
         raise UnknownError('traits non-list returned. {0}'.format(support_msg_api()))
     if self.traits:
         missing_traits = []
         #traits = [t.lower() for t in traits]
         for trait in self.traits:
             #if trait.lower() not in traits:
             if trait not in traits:
                 missing_traits.append(trait)
         if missing_traits:
             self.critical()
             self.msg += " (expected trait{plural} '{missing_traits}' not found in entity)".format(
                 missing_traits=','.join(missing_traits),
                 plural=plural(self.traits))
             return missing_traits
     return []
コード例 #49
0
    def check_table_regions(self):
        log.info('checking regions for table \'%s\'', self.table)
        regions = None
        try:
            table = self.conn.table(self.table)
            regions = table.regions()
        except HBaseIOError as _:
            #if 'org.apache.hadoop.hbase.TableNotFoundException' in _.message:
            if 'TableNotFoundException' in _.message:
                qquit('CRITICAL', 'table \'{0}\' does not exist'.format(self.table))
            else:
                qquit('CRITICAL', _)
        except (socket.error, socket.timeout, ThriftException) as _:
            qquit('CRITICAL', _)

        if log.isEnabledFor(logging.DEBUG):
            log.debug('%s', jsonpp(regions))
        if not regions:
            qquit('CRITICAL', 'failed to get regions for table \'{0}\''.format(self.table))
        if not isList(regions):
            qquit('UNKNOWN', 'region info returned is not a list! ' + support_msg_api())
        num_regions = len(regions)
        log.info('num regions: %s', num_regions)

        self.msg = 'HBase table \'{0}\' has {1} region{2}'.format(self.table, num_regions, plural(num_regions))
        self.check_thresholds(num_regions)

        num_unassigned_regions = 0
        for region in regions:
            try:
                if not region['server_name']:
                    #log.debug('region \'%s\' is not assigned to any server', region['name'])
                    num_unassigned_regions += 1
            except KeyError as _:
                qquit('UNKNOWN', 'failed to find server assigned to region. ' + support_msg_api())
        log.info('num unassigned regions: %s', num_unassigned_regions)
        self.msg += ', {0} unassigned region{1}'.format(num_unassigned_regions, plural(num_unassigned_regions))
        if num_unassigned_regions > 0:
            self.warning()
            self.msg += '!'

        self.msg += ' |'
        self.msg += ' num_regions={0}'.format(num_regions) + self.get_perf_thresholds(boundary='lower')
        self.msg += ' num_unassigned_regions={0};1;0'.format(num_unassigned_regions)
        log.info('finished, closing connection')
        self.conn.close()
コード例 #50
0
 def check_missing_tags(self, tags):
     if not isList(tags):
         raise UnknownError('tags non-list returned. {0}'.format(support_msg_api()))
     if self.tags:
         missing_tags = []
         #tags = [t.lower() for t in tags]
         for tag in self.tags:
             #if tag.lower() not in tags:
             if tag not in tags:
                 missing_tags.append(tag)
         if missing_tags:
             self.critical()
             self.msg += " (expected tag{plural} '{missing_tags}' not found in entity)".format(
                 missing_tags=','.join(missing_tags),
                 plural=plural(self.tags))
             return missing_tags
     return []
コード例 #51
0
 def run(self):
     log.info('querying %s', self.software)
     url = '{protocol}://{host}:{port}/admin/info'\
           .format(host=self.host, port=self.port, protocol=self.protocol)
     log.debug('GET %s', url)
     try:
         req = requests.get(url)
         #req = requests.get(url, auth=HTTPBasicAuth(self.user, self.password))
     except requests.exceptions.RequestException as _:
         errhint = ''
         if 'BadStatusLine' in str(_.message):
             errhint = ' (possibly connecting to an SSL secured port without using --ssl?)'
         elif self.protocol == 'https' and 'unknown protocol' in str(_.message):
             errhint = ' (possibly connecting to a plain HTTP port with the -S / --ssl switch enabled?)'
         qquit('CRITICAL', str(_) + errhint)
     log.debug("response: %s %s", req.status_code, req.reason)
     log.debug("content:\n%s\n%s\n%s", '='*80, req.content.strip(), '='*80)
     if req.status_code != 200:
         qquit('CRITICAL', '{0}: {1}'.format(req.status_code, req.reason))
     soup = BeautifulSoup(req.content, 'html.parser')
     if log.isEnabledFor(logging.DEBUG):
         log.debug("BeautifulSoup prettified:\n{0}\n{1}".format(soup.prettify(), '='*80))
     try:
         license_tag = soup.find('td', {'class': 'lic-value'})
         if not license_tag:
             qquit('UNKNOWN', 'failed to find license tag while parsing')
         expiry = license_tag.text.strip()
         license_datetime = datetime.strptime(expiry, '%Y-%m-%d %H:%M:%S')
         delta = license_datetime - datetime.now()
         days = delta.days
         if days < 0:
             qquit('CRITICAL', "license has already expired on '{0}'".format(expiry))
         self.msg = "{software} license expires in {days} day{plural}"\
                    .format(software=self.software, days=days, plural=plural(days))
         self.check_thresholds(days)
         self.msg += ", expiry date = '{expiry}' | days_until_expiry={days}{thresholds}"\
                     .format(expiry=expiry, days=days, thresholds=self.get_perf_thresholds(boundary='lower'))
     except (AttributeError, TypeError) as _:
         qquit('UNKNOWN', 'error parsing output from {software}: {exception}: {error}. {support_msg}'\
                          .format(software=self.software,
                                  exception=type(_).__name__,
                                  error=_,
                                  support_msg=support_msg_api()))
コード例 #52
0
 def check_longest_incomplete_ingest(self, result, max_runtime=None):
     log.info('checking longest running incomplete ingest')
     longest_incomplete_timedelta = None
     for item in result:
         status = item['status']
         if status == 'INCOMPLETE' and max_runtime is not None:
             runtime_delta = self.get_timedelta(item['ingestionTimeFormatted'])
             if longest_incomplete_timedelta is None or \
                self.timedelta_seconds(runtime_delta) > self.timedelta_seconds(longest_incomplete_timedelta):
                 longest_incomplete_timedelta = runtime_delta
     if max_runtime is not None and \
        longest_incomplete_timedelta is not None and \
        self.timedelta(longest_incomplete_timedelta) > max_runtime * 60.0:
         self.warning()
         self.msg += ', longest incomplete ingest runtime = {0} ago! '\
                     .format(sec2human(self.timedelta_seconds(longest_incomplete_timedelta))) + \
                     '(greater than expected {0} min{1})'\
                     .format(str(max_runtime).rstrip('0').rstrip('.'), plural(max_runtime))
     return longest_incomplete_timedelta
コード例 #53
0
 def check_last_ingest_age(self, results, max_age):
     log.info('checking last ingest age')
     if not isList(results):
         code_error('passed non-list to check_last_ingest_age()')
     # newest is first
     # effectiveDate is null in testing (docs says it's a placeholder for future use)
     # using ingestionTimeFormatted instead, could also use ingestionTime which is timestamp in millis
     ingestion_date = results[0]['ingestionTimeFormatted']
     age_timedelta = self.get_timedelta(ingestion_date=ingestion_date)
     age_timedelta_secs = self.timedelta_seconds(age_timedelta)
     if self.verbose:
         self.msg += ", last ingest start date = '{ingestion_date}'".format(ingestion_date=ingestion_date)
         self.msg += ', started {0} ago'.format(sec2human(age_timedelta_secs))
     if max_age is not None and age_timedelta_secs > (max_age * 60.0):
         self.warning()
         self.msg += ' (last run started more than {0} min{1} ago!)'.format(str(max_age)
                                                                            .rstrip('0')
                                                                            .rstrip('.'),
                                                                            plural(max_age))
     return age_timedelta_secs
コード例 #54
0
 def run(self):
     start_time = time.time()
     for page in range(1, self.max_pages + 1):
         url = 'https://registry.hub.docker.com/v2/repositories/{repo}/buildhistory?page={page}'\
               .format(repo=self.repo, page=page)
         req = self.request.get(url)
         if log.isEnabledFor(logging.DEBUG):
             log.debug(jsonpp(req.content))
         json_data = json.loads(req.content)
         log.debug('%s out of %s results returned for page %s', len(json_data['results']), json_data['count'], page)
         if self.process_results(json_data):
             # not quite as accurate as before as it now includes processing time but close enough
             query_time = time.time() - start_time
             if '|' not in self.msg:
                 self.msg += ' |'
             self.msg += ' query_time={0:.2f}s'.format(query_time)
             return True
     extra_info = ''
     if self.verbose:
         extra_info = ' ({0} page{1} of API output)'\
                      .format(self.max_pages, plural(self.max_pages))
     raise UnknownError('no completed builds found in last {0} builds{1}'.format(self.max_pages * 10, extra_info))
コード例 #55
0
ファイル: timeout.py プロジェクト: HariSekhon/pytools
 def timeout_handler(self, signum, frame):  # pylint: disable=unused-argument
     for child in psutil.Process().children():
         child.kill()
     time.sleep(1)
     qquit("UNKNOWN", "self timed out after %d second%s" % (self.timeout, plural(self.timeout)))
コード例 #56
0
    def parse_results(self, content):
        build = self.get_latest_build(content)

        number = build['number']
        log.info('build number = %s', number)
        if not isInt(number):
            raise UnknownError('build number returned is not an integer!')

        message = build['message']
        log.info('message = %s', message)

        branch = build['branch']
        log.info('branch = %s', branch)

        commit = build['commit']
        log.info('commit = %s', commit)

        started_at = build['started_at']
        log.info('started_at  = %s', started_at)

        finished_at = build['finished_at']
        log.info('finished_at = %s', finished_at)

        duration = build['duration']
        log.info('duration = %s', duration)
        if not isInt(duration):
            raise UnknownError('duration returned is not an integer!')

        repository_id = build['repository_id']
        log.info('repository_id = %s', repository_id)
        if not isInt(repository_id):
            raise UnknownError('repository_id returned is not an integer!')

        result = build['result']
        log.info('result = %s', result)

        state = build['state']
        log.info('state = %s', state)

        if result == 0:
            self.ok()
            status = "PASSED"
        else:
            self.critical()
            status = "FAILED"

        self.msg = "Travis CI build #{number} {status} for repo '{repo}' in {duration} secs".format(\
                               number=number, status=status, repo=self.repo, duration=duration)
        self.check_thresholds(duration)
        self.msg += ", started_at='{0}'".format(started_at)
        self.msg += ", finished_at='{0}'".format(finished_at)

        if self.verbose:
            self.msg += ", message='{0}'".format(message)
            self.msg += ", branch='{0}'".format(branch)
            self.msg += ", commit='{0}'".format(commit)
            self.msg += ", repository_id='{0}'".format(repository_id)

        if self.verbose or self.builds_in_progress > 0:
            self.msg += ", {0} build{1} in progress".format(self.builds_in_progress, plural(self.builds_in_progress))
        self.msg += " | last_build_duration={duration}s{perf_thresholds} num_builds_in_progress={builds_in_progress}"\
                    .format(duration=duration,
                            perf_thresholds=self.get_perf_thresholds(),
                            builds_in_progress=self.builds_in_progress)
コード例 #57
0
    def run(self):
        server_url = '{proto}://{host}:{port}'.format(proto=self.protocol, host=self.host, port=self.port)
        try:
            log.debug('setting up Jenkins connection to %s', server_url)
            start_time = time.time()
            server = jenkins.Jenkins(server_url, username=self.user, password=self.password, timeout=self.timeout / 3)
            if log.isEnabledFor(logging.DEBUG):
                log.debug('getting user')
                user = server.get_whoami()
                log.debug('connected as user %s', jsonpp(user))
            log.debug('getting plugin info')
            #plugins = server.get_plugins()
            # deprecated but .get_plugins() output is not JSON serializable
            # so must use old deprecated method get_plugins_info() :-/
            plugins = server.get_plugins_info()
            query_time = time.time() - start_time
        except jenkins.JenkinsException as _:
            raise CriticalError(_)

        if log.isEnabledFor(logging.DEBUG):
            log.debug('%s', jsonpp(plugins))
        plugin_count = len(plugins)
        update_count = 0
        for plugin in plugins:
            if plugin['hasUpdate']:
                update_count += 1
        self.msg += " {0} plugin update{1} available out of {2} installed plugin{3}".format(update_count,
                                                                                            plural(update_count),
                                                                                            plugin_count,
                                                                                            plural(plugin_count))
        if update_count:
            self.warning()
        self.msg += ' | updates_available={0};1 plugins_installed={1} query_time={2:.4f}s'.format(update_count,
                                                                                                  plugin_count,
                                                                                                  query_time)
コード例 #58
0
 def parse_json(self, json_data):
     if not isList(json_data):
         raise UnknownError('non-list returned by Presto for nodes failed. {0}'.format(support_msg_api()))
     num_failed_nodes = len(json_data)
     self.msg = 'Presto SQL - {0} worker node{1} failed'.format(num_failed_nodes, plural(num_failed_nodes))
     self.check_thresholds(num_failed_nodes)