def _run(self): result = StatusCheckResult(status_check=self) args = [ 'ping', '-s', str(self.packet_size), '-c', str(self.count), '-W', str(self.timeout), self.host ] try: result.raw_data = subprocess.check_output(args, stderr=subprocess.STDOUT, shell=False) r = self._parse_output(result.raw_data) if r['packet_loss'] > 0.0: raise Exception("%0.1f%% packet loss" % r['packet_loss']) elif self.max_rtt and r['rtt']['avg'] > self.max_rtt: raise Exception("Maximum average RTT reached: %s" % r['rtt']['avg']) except subprocess.CalledProcessError as e: result.succeeded = False result.error = e.output except Exception as e: result.error = u"{}, Host: {}".format(e.message, self.host) result.succeeded = False else: result.succeeded = True return result
def _run(self): result = StatusCheckResult(status_check=self) try: rval = self._check() except StatusGoException as e: result.raw_data = e.raw_data result.error = u'Error occurred: {}'.format(e.message) result.succeeded = False except Exception as e: result.error = u'Error occurred: {}'.format(e) result.succeeded = False else: result.raw_data = rval result.succeeded = True return result
def _run(self): result = StatusCheckResult(status_check=self) sess = SmtpSession() conversation = [] try: sess.connect(self.host, self.port) sess.ehlo(self.helo_address) if self.sender: sess.call('MAIL FROM:', self.sender) if self.sender and self.recipient: sess.call('RCPT TO:', self.recipient) except Exception as e: result.error = u'Error occurred %s: %s' % ( e.__class__.__name__, e.message, ) result.succeeded = False if len(sess.response_codes) > 0: result.succeeded = self.expected_code == sess.response_codes[ -1] except: result.error = u'Error occurred: %s' % (sys.exc_info()[0], ) result.succeeded = False else: result.succeeded = False if len(sess.response_codes) > 0: result.succeeded = self.expected_code == sess.response_codes[ -1] finally: sess.quit() result.raw_data = "\n".join(sess.conversation) return result
def _run(self): result = StatusCheckResult(status_check=self) try: self.checkIfMonitorIdExists() monitorResponse = self.findMonitor() if (monitorResponse.status_code == 401): result.error = u"Cant find monitor process {} with id: {}. Probably it was deleted.".format(self.monitor_name,self.monitor_id) result.succeeded = False result.raw_data = '401 UNAUTHORIZED' return result if (monitorResponse.status_code == 404): result.error = u"Cant find monitor process {} with id: {}. Probably it was deleted.".format(self.monitor_name,self.monitor_id) result.succeeded = False result.raw_data = '404 NOT FOUND' return result if (monitorResponse.status_code == 200): monitorData = monitorResponse.json().get('monitorDetails') if (monitorData.get('isDown')): result.error = u"Monitor process {} is down! Please checkin using URL: {}".format(self.monitor_name,self.monitor_checkin) result.succeeded = False result.raw_data = self.buildRawData(monitorData) return result else: result.succeeded = True result.error = 'None' result.raw_data = 'Monitor is alive!' return result result.succeeded = True result.error = 'Unexpected response!' result.raw_data = u'Response code is: {}'.format(monitorResponse.status_code) return result except Exception as e: result.error = e.args result.succeeded = False result.raw_data = e.args return result
def run_metrics_check(check): """ Run the status check. :param check: the status check :return: a StatusCheckResult containing success/failure/error information """ # Get the series data. If there was an error, return immediately. series = check.get_series() # If there was an error fetching metrics, fail if series['error'] is True: message = series.get('error_message') logger.exception('Error fetching metrics: {}: {}'.format( series.get('error_code'), message)) error = 'Error fetching metric from source: {}'.format(message) return StatusCheckResult(status_check=check, succeeded=False, error=error), [check.tag_fetch_error] # If the series is empty, apply the empty-series handler if series['data'] == []: if check.on_empty_series == defs.ON_EMPTY_SERIES_PASS: return StatusCheckResult(status_check=check, succeeded=True, error='SUCCESS: no data'), [] if check.on_empty_series == defs.ON_EMPTY_SERIES_WARN: check.importance = Service.WARNING_STATUS tags = [check.tag_no_data] return StatusCheckResult(status_check=check, succeeded=False, error='WARNING: no data'), tags if check.on_empty_series == defs.ON_EMPTY_SERIES_FAIL: check.importance = check.high_alert_importance tags = [check.tag_no_data] return StatusCheckResult(status_check=check, succeeded=False, error='{}: no data'.format( check.importance)), tags # Ignore all checks before the following start time start_time = time.time() - check.time_range * 60 def filter_old_points(p): timestamp = p[0] if timestamp <= start_time: logger.debug('Ignoring point {} older than {}'.format( str(p), str(start_time))) return False return True parsed_series = series['data'] logger.info('Processing series {}'.format(str(parsed_series))) # order is important - most severe first, since we report the first error found thresholds = [ (check.high_alert_importance, check.high_alert_value), (Service.WARNING_STATUS, check.warning_value), ] # Process each series, updating result and tags as we go result = StatusCheckResult(status_check=check, succeeded=True) result.raw_data = _get_raw_data_with_thresholds(check, series) tags = [] # loop order is: # (high_importance, series_1), (high_importance, series_2), ..., # (warning, series_1), (warning, series_2), ... # and we report the first error encountered as our error # (but continue looping so we accumulate tags) for importance, threshold in thresholds: for series_data in parsed_series: series_name = series_data['series'] datapoints = list( filter(filter_old_points, series_data['datapoints'])) failing_point = _point_triggering_alert(datapoints, check.check_type, check.consecutive_failures, threshold) if failing_point is not None: tags.append(check.tag_failing(importance, series_name)) if result.succeeded: # record the first, most severe failure result.succeeded = False check.importance = importance result.error = _get_error_message(check, threshold, importance, series_name, failing_point[1]) logger.info('Finished processing series {}'.format(series_name)) return result, tags
def _run(self): if not hasattr(self, 'utcnow'): self.utcnow = None result = StatusCheckResult(status_check=self) # NOTE: Can be added later # last_result = self.last_result() # # if last_result: # last_result_started = last_result.time # time_to_check = max(self.frequency, ((timezone.now() - last_result_started).total_seconds() / 60) + 1) # else: # time_to_check = self.frequency output = self.parse_metric() result.raw_data = output["raw"] # Check if the metric condition if output["error"]: result.error = output["error"] result.succeeded = False return result if not output["num_series_with_data"]: result.error = "Empty result for given metric" result.succeeded = False return result failures = [] failure_value = None if output['num_series_with_data'] > 0: result.average_value = output['average_value'] for s in output['series']: if not s["values"]: continue failure_value = None if self.check_type == '<': if float(s["min"]) < float(self.value): failure_value = s["min"] elif self.check_type == '<=': if float(s["min"]) <= float(self.value): failure_value = s["min"] elif self.check_type == '>': if float(s["max"]) > float(self.value): failure_value = s["max"] elif self.check_type == '>=': if float(s["max"]) >= float(self.value): failure_value = s["max"] elif self.check_type == '==': if float(self.value) in s['values']: failure_value = float(self.value) else: raise Exception(u'Check type %s not supported' % self.check_type) if failure_value: failures.append(failure_value) if len(failures) > self.allowed_num_failures: result.succeeded = False elif output['num_series_with_data'] < self.expected_num_hosts: result.succeeded = False else: result.succeeded = True if not result.succeeded: # targets = [s["target"] for s in output["series"]] # hosts = minimize_targets(targets) # hosts_by_target = dict(zip(targets, hosts)) result.error = self.format_error_message( failures, output['num_series_with_data'] ) return result