Example #1
0
    def _run(self):
        result = StatusCheckResult(status_check=self)

        args = [
            'ping', '-s',
            str(self.packet_size), '-c',
            str(self.count), '-W',
            str(self.timeout), self.host
        ]

        try:
            result.raw_data = subprocess.check_output(args,
                                                      stderr=subprocess.STDOUT,
                                                      shell=False)
            r = self._parse_output(result.raw_data)
            if r['packet_loss'] > 0.0:
                raise Exception("%0.1f%% packet loss" % r['packet_loss'])
            elif self.max_rtt and r['rtt']['avg'] > self.max_rtt:
                raise Exception("Maximum average RTT reached: %s" %
                                r['rtt']['avg'])
        except subprocess.CalledProcessError as e:
            result.succeeded = False
            result.error = e.output
        except Exception as e:
            result.error = u"{}, Host: {}".format(e.message, self.host)
            result.succeeded = False
        else:
            result.succeeded = True

        return result
Example #2
0
    def _run(self):
        result = StatusCheckResult(status_check=self)

        try:
            s = socket.create_connection((self.host, self.port), self.timeout)
            s.shutdown(socket.SHUT_RDWR)
            s.close()
        except Exception as e:
            result.error = u'Error occurred: %s' % (e.message, )
            result.succeeded = False
        else:
            result.succeeded = True

        return result
Example #3
0
    def _run(self):
        result = StatusCheckResult(status_check=self)

        try:
            s = socket.create_connection((self.host, self.port), self.timeout)
        except Exception as e:
            result.error = u'Error occurred: %s' % (e.message, )
            result.succeeded = False
        else:
            # the connection was successful, refine the check by verifying
            # if other success criteria are satisfied (if any)

            # here we verify whether the check consists of sending a message to
            # the server or not, and whether it is a binay payload or not
            if self.message_to_send:
                if self.message_to_send_b64:
                    self.message_to_send = self.message_to_send.decode(
                        'base64')
                s.send(self.message_to_send)

            # probe it further, by comparing the received response with the
            # expected one
            if self.expected_reply:
                if self.expected_reply_b64:
                    self.expected_reply = self.expected_reply.decode('base64')

                # here we only read as many bytes as the length of the
                # expected response. This is done for convenience - sometimes
                # a server's reply can be pretty long, and if you only care
                # about the beginning of the message, there's no need to look
                # into the remaining part. For example, in the case of HTTP
                # what we expect is `HTTP/1.1 200 OK` and we ignore the rest of
                # the response
                received_response = s.read(len(self.expected_reply))
                if received_response == self.expected_reply:
                    result.succeeded = True
                else:
                    result.error = u'Got unexpected response %r' % (
                        received_response, )
                    result.succeeded = False

            result.succeeded = True
        finally:
            s.shutdown(socket.SHUT_RDWR)
            s.close()

        return result
Example #4
0
    def _run(self):
        result = StatusCheckResult(status_check=self)

        try:
            remaining = self.ssl_expiry_datetime() - datetime.datetime.utcnow()

            if remaining < datetime.timedelta(days=0):
                raise Exception("Certificate expired %s days ago" % remaining.days)
            elif remaining < datetime.timedelta(days=self.days):
                raise Exception("Certificate expires in %s days" % remaining.days)
        except Exception as e:
            result.error = u"{} {} {}".format(e.message, self.host, self.port)
            result.succeeded = False
        else:
            result.succeeded = True

        return result
Example #5
0
    def _run(self):
        result = StatusCheckResult(status_check=self)

        try:
            conn = psycopg2.connect(dbname=self.dbname,
                                    user=self.dbuser,
                                    password=self.dbpassword,
                                    host=self.host,
                                    port=self.port)
            conn.close()
        except Exception as e:
            result.error = u'Error occurred: %s' % (e.message)
            result.succeeded = False
        else:
            result.succeeded = True

        return result
Example #6
0
    def _run(self):
        result = StatusCheckResult(status_check=self)

        try:
            rval = self._check()
        except StatusGoException as e:
            result.raw_data = e.raw_data
            result.error = u'Error occurred: {}'.format(e.message)
            result.succeeded = False
        except Exception as e:
            result.error = u'Error occurred: {}'.format(e)
            result.succeeded = False
        else:
            result.raw_data = rval
            result.succeeded = True

        return result
Example #7
0
    def _run(self):
        result = StatusCheckResult(status_check=self)

        sess = SmtpSession()
        conversation = []
        try:
            sess.connect(self.host, self.port)
            sess.ehlo(self.helo_address)

            if self.sender:
                sess.call('MAIL FROM:', self.sender)

            if self.sender and self.recipient:
                sess.call('RCPT TO:', self.recipient)

        except Exception as e:
            result.error = u'Error occurred %s: %s' % (
                e.__class__.__name__,
                e.message,
            )
            result.succeeded = False

            if len(sess.response_codes) > 0:
                result.succeeded = self.expected_code == sess.response_codes[
                    -1]
        except:
            result.error = u'Error occurred: %s' % (sys.exc_info()[0], )
            result.succeeded = False
        else:
            result.succeeded = False
            if len(sess.response_codes) > 0:
                result.succeeded = self.expected_code == sess.response_codes[
                    -1]
        finally:
            sess.quit()

        result.raw_data = "\n".join(sess.conversation)

        return result
 def _run(self):
     result = StatusCheckResult(status_check=self)
     try:
         self.checkIfMonitorIdExists()
         monitorResponse = self.findMonitor()
         if (monitorResponse.status_code == 401):
             result.error = u"Cant find monitor process {} with id: {}. Probably it was deleted.".format(self.monitor_name,self.monitor_id)
             result.succeeded = False
             result.raw_data = '401 UNAUTHORIZED'
             return result
         if (monitorResponse.status_code == 404):
             result.error = u"Cant find monitor process {} with id: {}. Probably it was deleted.".format(self.monitor_name,self.monitor_id)
             result.succeeded = False
             result.raw_data = '404 NOT FOUND'
             return result
         if (monitorResponse.status_code == 200):
             monitorData = monitorResponse.json().get('monitorDetails')
             if (monitorData.get('isDown')):
                 result.error = u"Monitor process {} is down! Please checkin using URL: {}".format(self.monitor_name,self.monitor_checkin)
                 result.succeeded = False
                 result.raw_data = self.buildRawData(monitorData)
                 return result
             else:
                 result.succeeded = True
                 result.error = 'None'
                 result.raw_data = 'Monitor is alive!'
                 return result
         result.succeeded = True
         result.error = 'Unexpected response!'
         result.raw_data = u'Response code is: {}'.format(monitorResponse.status_code)
         return result
     except Exception as e:
         result.error = e.args
         result.succeeded = False
         result.raw_data = e.args
         return result
Example #9
0
    def _run(self):
        result = StatusCheckResult(status_check=self)

        try:
            client = get_boto_client(self.cloudwatch_config)
        except Exception as e:
            result.succeeded = False
            result.error = u"Couldn't create cloudwatch client: {}".format(e)
            return result
        else:
            namespace, metric_name = self.cloudwatch_metric.split(":")
            start_time = datetime.now() - timedelta(minutes=self.frequency)
            end_time = datetime.now()
            resp = client.get_metric_statistics(
                Namespace=namespace,
                MetricName=metric_name,
                Dimensions=self.parsed_dimensions(),
                StartTime=start_time,
                EndTime=end_time,
                Period=60,
                Statistics=['SampleCount','Average','Sum','Minimum','Maximum',],
            )

            if len(resp['Datapoints']) == 0:
                result.succeeded = False
                result.error = u"No datapoints"
                return result

            failures = []
            stats = [dp[self.statistic] for dp in resp['Datapoints']]
            for stat in stats:
                failure_value = None
                if self.check_type == '<':
                    if stat < float(self.value):
                        failure_value = stat
                elif self.check_type == '<=':
                    if stat <= float(self.value):
                        failure_value = stat
                elif self.check_type == '>':
                    if stat > float(self.value):
                        failure_value = stat
                elif self.check_type == '>=':
                    if stat >= float(self.value):
                        failure_value = stat
                elif self.check_type == '==':
                    if float(self.value) == stat:
                        failure_value = float(self.value)
                else:
                    raise Exception(u'Check type %s not supported' %
                                    self.check_type)

                if not failure_value is None:
                    failures.append(failure_value)

            if len(failures) > 0:
                result.succeeded = False
                result.error = u"{} {} {}".format(failures, self.check_type, self.value)
                return result

        result.succeeded = True
        return result
Example #10
0
def run_metrics_check(check):
    """
    Run the status check.
    :param check: the status check
    :return: a StatusCheckResult containing success/failure/error information
    """
    # Get the series data. If there was an error, return immediately.
    series = check.get_series()

    # If there was an error fetching metrics, fail
    if series['error'] is True:
        message = series.get('error_message')
        logger.exception('Error fetching metrics: {}: {}'.format(
            series.get('error_code'), message))
        error = 'Error fetching metric from source: {}'.format(message)
        return StatusCheckResult(status_check=check,
                                 succeeded=False,
                                 error=error), [check.tag_fetch_error]

    # If the series is empty, apply the empty-series handler
    if series['data'] == []:
        if check.on_empty_series == defs.ON_EMPTY_SERIES_PASS:
            return StatusCheckResult(status_check=check,
                                     succeeded=True,
                                     error='SUCCESS: no data'), []
        if check.on_empty_series == defs.ON_EMPTY_SERIES_WARN:
            check.importance = Service.WARNING_STATUS
            tags = [check.tag_no_data]
            return StatusCheckResult(status_check=check,
                                     succeeded=False,
                                     error='WARNING: no data'), tags
        if check.on_empty_series == defs.ON_EMPTY_SERIES_FAIL:
            check.importance = check.high_alert_importance
            tags = [check.tag_no_data]
            return StatusCheckResult(status_check=check,
                                     succeeded=False,
                                     error='{}: no data'.format(
                                         check.importance)), tags

    # Ignore all checks before the following start time
    start_time = time.time() - check.time_range * 60

    def filter_old_points(p):
        timestamp = p[0]
        if timestamp <= start_time:
            logger.debug('Ignoring point {} older than {}'.format(
                str(p), str(start_time)))
            return False
        return True

    parsed_series = series['data']
    logger.info('Processing series {}'.format(str(parsed_series)))

    # order is important - most severe first, since we report the first error found
    thresholds = [
        (check.high_alert_importance, check.high_alert_value),
        (Service.WARNING_STATUS, check.warning_value),
    ]

    # Process each series, updating result and tags as we go
    result = StatusCheckResult(status_check=check, succeeded=True)
    result.raw_data = _get_raw_data_with_thresholds(check, series)
    tags = []

    # loop order is:
    #   (high_importance, series_1), (high_importance, series_2), ...,
    #   (warning, series_1), (warning, series_2), ...
    # and we report the first error encountered as our error
    # (but continue looping so we accumulate tags)
    for importance, threshold in thresholds:
        for series_data in parsed_series:
            series_name = series_data['series']
            datapoints = list(
                filter(filter_old_points, series_data['datapoints']))
            failing_point = _point_triggering_alert(datapoints,
                                                    check.check_type,
                                                    check.consecutive_failures,
                                                    threshold)
            if failing_point is not None:
                tags.append(check.tag_failing(importance, series_name))
                if result.succeeded:
                    # record the first, most severe failure
                    result.succeeded = False
                    check.importance = importance
                    result.error = _get_error_message(check, threshold,
                                                      importance, series_name,
                                                      failing_point[1])

            logger.info('Finished processing series {}'.format(series_name))

    return result, tags
Example #11
0
    def _run(self):
        if not hasattr(self, 'utcnow'):
            self.utcnow = None

        result = StatusCheckResult(status_check=self)
        # NOTE: Can be added later
        # last_result = self.last_result()
        #
        # if last_result:
        #     last_result_started = last_result.time
        #     time_to_check = max(self.frequency, ((timezone.now() - last_result_started).total_seconds() / 60) + 1)
        # else:
        #     time_to_check = self.frequency

        output = self.parse_metric()
        result.raw_data = output["raw"]

        # Check if the metric condition
        if output["error"]:
            result.error = output["error"]
            result.succeeded = False
            return result

        if not output["num_series_with_data"]:
            result.error = "Empty result for given metric"
            result.succeeded = False
            return result

        failures = []
        failure_value = None
        if output['num_series_with_data'] > 0:
            result.average_value = output['average_value']
            for s in output['series']:
                if not s["values"]:
                    continue
                failure_value = None
                if self.check_type == '<':
                    if float(s["min"]) < float(self.value):
                        failure_value = s["min"]
                elif self.check_type == '<=':
                    if float(s["min"]) <= float(self.value):
                        failure_value = s["min"]
                elif self.check_type == '>':
                    if float(s["max"]) > float(self.value):
                        failure_value = s["max"]
                elif self.check_type == '>=':
                    if float(s["max"]) >= float(self.value):
                        failure_value = s["max"]
                elif self.check_type == '==':
                    if float(self.value) in s['values']:
                        failure_value = float(self.value)
                else:
                    raise Exception(u'Check type %s not supported' %
                                    self.check_type)

                if failure_value:
                    failures.append(failure_value)

        if len(failures) > self.allowed_num_failures:
            result.succeeded = False

        elif output['num_series_with_data'] < self.expected_num_hosts:
            result.succeeded = False
        else:
            result.succeeded = True

        if not result.succeeded:
            # targets = [s["target"] for s in output["series"]]
            # hosts = minimize_targets(targets)
            # hosts_by_target = dict(zip(targets, hosts))

            result.error = self.format_error_message(
                failures,
                output['num_series_with_data']
            )

        return result