Example #1
0
    def test_infinite_cleanup_loop(self, mocked_apply_async):
        """
        There is a potential for the cleanup task to constantly call itself
        if every time it re-runs there is at least 1 new object to clean up
        (i.e. every 3 seconds for 60 days a new result is recorded). Make sure
        it only re-calls itself if the whole batch is used.
        """
        with self.settings(CELERY_ALWAYS_EAGER=False):
            initial_results = StatusCheckResult.objects.all().count()

            for i in range(2):
                StatusCheckResult(status_check=self.graphite_check,
                                  time=timezone.now() - timedelta(days=61),
                                  time_complete=timezone.now() -
                                  timedelta(days=61),
                                  succeeded=False).save()

            tasks.clean_db(batch_size=2)
            # If full batch is cleaned it should queue itself again
            self.assertTrue(mocked_apply_async.called)

            StatusCheckResult(status_check=self.graphite_check,
                              time=timezone.now() - timedelta(days=61),
                              time_complete=timezone.now() -
                              timedelta(days=61),
                              succeeded=False).save()

            mocked_apply_async.reset_mock()
            tasks.clean_db(batch_size=2)
            # This time full batch isn't cleaned (only 1 out of 2) - don't call again
            self.assertFalse(mocked_apply_async.called)
Example #2
0
    def create_dummy_data(self):
        self.username = '******'
        self.password = '******'
        self.user = User.objects.create(username=self.username)
        self.user.set_password(self.password)
        self.user.user_permissions.add(
            Permission.objects.get(codename='add_instance'),
            Permission.objects.get(codename='add_service'),
            Permission.objects.get(codename='add_httpstatuscheck'),
            Permission.objects.get(codename='add_graphitestatuscheck'),
            Permission.objects.get(codename='add_jenkinsstatuscheck'),
            Permission.objects.get(codename='add_icmpstatuscheck'),
        )
        self.user.save()
        self.graphite_check = GraphiteStatusCheck.objects.create(
            name='Graphite Check',
            metric='stats.fake.value',
            check_type='>',
            value='9.0',
            created_by=self.user,
            importance=Service.ERROR_STATUS,
        )
        self.jenkins_check = JenkinsStatusCheck.objects.create(
            name='Jenkins Check',
            created_by=self.user,
            importance=Service.ERROR_STATUS,
            max_queued_build_time=10,
        )
        self.http_check = HttpStatusCheck.objects.create(
            name='Http Check',
            created_by=self.user,
            importance=Service.CRITICAL_STATUS,
            endpoint='http://arachnys.com',
            timeout=10,
            status_code='200',
            text_match=None,
        )
        self.service = Service.objects.create(
            name='Service',
        )

        self.service.status_checks.add(
            self.graphite_check, self.jenkins_check, self.http_check)
        # failing is second most recent
        self.older_result = StatusCheckResult(
            status_check=self.graphite_check,
            time=timezone.now() - timedelta(seconds=60),
            time_complete=timezone.now() - timedelta(seconds=59),
            succeeded=False
        )
        self.older_result.save()
        # Passing is most recent
        self.most_recent_result = StatusCheckResult(
            status_check=self.graphite_check,
            time=timezone.now() - timedelta(seconds=1),
            time_complete=timezone.now(),
            succeeded=True
        )
        self.most_recent_result.save()
        self.graphite_check.save()  # Will recalculate status
Example #3
0
    def test_match_all_in(self):
        tags = [
            StatusCheckResultTag.objects.get_or_create(value='tag' + str(i))[0]
            for i in range(3)
        ]
        ack = Acknowledgement(status_check=self.http_check,
                              match_if=Acknowledgement.MATCH_ALL_IN)
        ack.save()
        ack.tags.add(tags[0], tags[1])

        now = timezone.now()
        result = StatusCheckResult(status_check=self.http_check,
                                   succeeded=False,
                                   time=now,
                                   time_complete=now)
        result.save()

        # no tags matches
        self.assertTrue(ack.matches_result(result))

        # 1 matching tag matches
        result.tags.add(tags[0])
        self.assertTrue(ack.matches_result(result))

        # 1 matching, 1 not should NOT match
        result.tags.add(tags[2])
        self.assertFalse(ack.matches_result(result))
Example #4
0
    def run_checks(self, checks, from_service_status=None):
        # type: (List[Tuple[StatusCheck, bool, bool]], Union[None, str]) -> None
        """
        Simulates running the given checks with the given results, then updates the service (triggering alerts).
        All previous StatusCheckResults are cleared by calling this function. A check can be listed more than once.
        You should set up self.service.alerts before calling this.
        :param checks: list of (check, succeeded, acked) tuples
        :param from_service_status: specify the service status to transition from (service.old_overall_status), optional
        """
        # clear any previous results
        StatusCheckResult.objects.all().delete()

        for check, succeeded, acked in checks:
            now = timezone.now()
            result = StatusCheckResult(status_check=check, time=now, time_complete=now, succeeded=succeeded)
            if hasattr(StatusCheckResult, 'acked'):  # forwards-compatible with acks
                result.acked = acked
            result.save()

            check.last_run = now
            check.save()

        if from_service_status:
            self.service.overall_status = from_service_status

        self.service.update_status()
Example #5
0
    def create_dummy_data(self):
        self.username = '******'
        self.password = '******'
        self.user = User.objects.create(username=self.username)
        self.user.set_password(self.password)
        self.user.user_permissions.add(
            Permission.objects.get(codename='add_instance'),
            Permission.objects.get(codename='add_service'),
            Permission.objects.get(codename='add_httpstatuscheck'),
            Permission.objects.get(codename='add_graphitestatuscheck'),
            Permission.objects.get(codename='add_jenkinsstatuscheck'),
            Permission.objects.get(codename='add_icmpstatuscheck'),
        )
        self.user.save()
        self.graphite_check = GraphiteStatusCheck.objects.create(
            name='Graphite Check',
            metric='stats.fake.value',
            check_type='>',
            value='9.0',
            created_by=self.user,
            importance=Service.ERROR_STATUS,
        )
        self.jenkins_check = JenkinsStatusCheck.objects.create(
            name='Jenkins Check',
            created_by=self.user,
            importance=Service.ERROR_STATUS,
            max_queued_build_time=10,
        )
        self.http_check = HttpStatusCheck.objects.create(
            name='Http Check',
            created_by=self.user,
            importance=Service.CRITICAL_STATUS,
            endpoint='http://arachnys.com',
            timeout=10,
            status_code='200',
            text_match=None,
        )
        self.service = Service.objects.create(
            name='Service',
        )

        self.service.status_checks.add(
            self.graphite_check, self.jenkins_check, self.http_check)
        # failing is second most recent
        self.older_result = StatusCheckResult(
            check=self.graphite_check,
            time=timezone.now() - timedelta(seconds=60),
            time_complete=timezone.now() - timedelta(seconds=59),
            succeeded=False
        )
        self.older_result.save()
        # Passing is most recent
        self.most_recent_result = StatusCheckResult(
            check=self.graphite_check,
            time=timezone.now() - timedelta(seconds=1),
            time_complete=timezone.now(),
            succeeded=True
        )
        self.most_recent_result.save()
        self.graphite_check.save()  # Will recalculate status
Example #6
0
    def create_dummy_data(self):
        self.username = '******'
        self.password = '******'
        self.user = User.objects.create(username=self.username)
        self.user.set_password(self.password)
        self.user.user_permissions.add(
            Permission.objects.get(codename='add_instance'),
            Permission.objects.get(codename='add_service'),
        )
        self.user.save()
        self.service = Service.objects.create(
            name='Service',
        )

        self.port_open_check_model = StatusCheckPluginModel.objects.create(slug='port_open_check')
        self.chat_messenger_alert_model = AlertPluginModel.objects.create(slug='chat_messenger_alert')
        # Refetch User model with new chat messenger settings.
        self.user = User.objects.get(username='******')
        self.user.chat_messenger_alert_settings.nickname = "Xx__CabotMaster420__xX"

        self.port_open_check = StatusCheck.objects.create(
            name = 'Port Open Check for Service',
            check_plugin = StatusCheckPluginModel.objects.get(slug='port_open_check'),
            created_by = self.user,
            importance = Service.ERROR_STATUS,
            port = 123,
            address = 'ports.arachnys.com'
        )
        self.assertEqual(self.port_open_check.get_variable('port'), 123)
        self.assertEqual(StatusCheck.objects.get().port, 123)
        self.assertEqual(StatusCheck.objects.get().address, 'ports.arachnys.com')

        self.port_open_check_2 = StatusCheck.objects.create(
            name = 'Port Open Check for Service 2',
            check_plugin = StatusCheckPluginModel.objects.get(slug='port_open_check'),
            created_by = self.user,
            importance = Service.ERROR_STATUS,
            port = 456,
            address = 'ports.arachnys.com'
        )

        self.service.status_checks.add(self.port_open_check, self.port_open_check_2)
        # failing is second most recent
        self.older_result = StatusCheckResult(
            status_check=self.port_open_check,
            time=timezone.now() - timedelta(seconds=60),
            time_complete=timezone.now() - timedelta(seconds=59),
            succeeded=False
        )
        self.older_result.save()
        # Passing is most recent
        self.most_recent_result = StatusCheckResult(
            status_check=self.port_open_check,
            time=timezone.now() - timedelta(seconds=1),
            time_complete=timezone.now(),
            succeeded=True
        )
        self.most_recent_result.save()
        self.port_open_check.save()  # Will recalculate status
Example #7
0
class LocalTestCase(TestCase):
    def setUp(self):
        requests.get = Mock()
        requests.post = Mock()
        rest.TwilioRestClient = Mock()
        mail.send_mail = Mock()
        self.create_dummy_data()
        super(LocalTestCase, self).setUp()

    def create_dummy_data(self):
        self.username = '******'
        self.password = '******'
        self.user = User.objects.create(username=self.username)
        self.user.set_password(self.password)
        self.user.save()
        self.graphite_check = GraphiteStatusCheck.objects.create(
            name='Graphite Check',
            metric='stats.fake.value',
            check_type='>',
            value='9.0',
            created_by=self.user,
            importance=Service.ERROR_STATUS,
        )
        self.jenkins_check = JenkinsStatusCheck.objects.create(
            name='Jenkins Check',
            created_by=self.user,
            importance=Service.ERROR_STATUS,
            max_queued_build_time=10,
        )
        self.http_check = HttpStatusCheck.objects.create(
            name='Http Check',
            created_by=self.user,
            importance=Service.CRITICAL_STATUS,
            endpoint='http://arachnys.com',
            timeout=10,
            status_code='200',
            text_match=None,
        )
        self.service = Service.objects.create(name='Service', )

        self.service.status_checks.add(self.graphite_check, self.jenkins_check,
                                       self.http_check)
        # Passing is most recent
        self.most_recent_result = StatusCheckResult(
            check=self.graphite_check,
            time=timezone.now() - timedelta(seconds=1),
            time_complete=timezone.now(),
            succeeded=True)
        self.most_recent_result.save()
        # failing is second most recent
        self.older_result = StatusCheckResult(
            check=self.graphite_check,
            time=timezone.now() - timedelta(seconds=60),
            time_complete=timezone.now() - timedelta(seconds=59),
            succeeded=False)
        self.older_result.save()
        self.graphite_check.save()  # Will recalculate status
Example #8
0
    def _run(self):
        result = StatusCheckResult(status_check=self)

        try:
            s = socket.create_connection((self.host, self.port), self.timeout)
            s.shutdown(socket.SHUT_RDWR)
            s.close()
        except Exception as e:
            result.error = u'Error occurred: %s' % (e.message, )
            result.succeeded = False
        else:
            result.succeeded = True

        return result
Example #9
0
    def _run(self):
        result = StatusCheckResult(status_check=self)

        args = [
            'ping', '-s',
            str(self.packet_size), '-c',
            str(self.count), '-W',
            str(self.timeout), self.host
        ]

        try:
            result.raw_data = subprocess.check_output(args,
                                                      stderr=subprocess.STDOUT,
                                                      shell=False)
            r = self._parse_output(result.raw_data)
            if r['packet_loss'] > 0.0:
                raise Exception("%0.1f%% packet loss" % r['packet_loss'])
            elif self.max_rtt and r['rtt']['avg'] > self.max_rtt:
                raise Exception("Maximum average RTT reached: %s" %
                                r['rtt']['avg'])
        except subprocess.CalledProcessError as e:
            result.succeeded = False
            result.error = e.output
        except Exception as e:
            result.error = u"{}, Host: {}".format(e.message, self.host)
            result.succeeded = False
        else:
            result.succeeded = True

        return result
Example #10
0
 def trigger_failing_check(self, check):
     StatusCheckResult(status_check=check,
                       time=timezone.now() - timedelta(seconds=60),
                       time_complete=timezone.now() - timedelta(seconds=59),
                       succeeded=False).save()
     check.last_run = timezone.now()
     check.save()
Example #11
0
    def test_cleanup_simple(self):
        initial_results = StatusCheckResult.objects.all().count()
        initial_snapshots = ServiceStatusSnapshot.objects.all().count()

        ServiceStatusSnapshot(
            service=self.service,
            num_checks_active=1,
            num_checks_passing=1,
            num_checks_failing=1,
            overall_status=self.service.overall_status,
            time=timezone.now() - timedelta(days=61),
        ).save()

        StatusCheckResult(status_check=self.graphite_check,
                          time=timezone.now() - timedelta(days=61),
                          time_complete=timezone.now() - timedelta(days=61),
                          succeeded=False).save()

        self.assertEqual(StatusCheckResult.objects.all().count(),
                         initial_results + 1)
        tasks.clean_db()
        self.assertEqual(StatusCheckResult.objects.all().count(),
                         initial_results)
        self.assertEqual(ServiceStatusSnapshot.objects.all().count(),
                         initial_snapshots)
Example #12
0
    def _run(self):
        result = StatusCheckResult(status_check=self)

        try:
            remaining = self.ssl_expiry_datetime() - datetime.datetime.utcnow()

            if remaining < datetime.timedelta(days=0):
                raise Exception("Certificate expired %s days ago" % remaining.days)
            elif remaining < datetime.timedelta(days=self.days):
                raise Exception("Certificate expires in %s days" % remaining.days)
        except Exception as e:
            result.error = u"{} {} {}".format(e.message, self.host, self.port)
            result.succeeded = False
        else:
            result.succeeded = True

        return result
Example #13
0
    def _run(self):
        result = StatusCheckResult(status_check=self)

        try:
            conn = psycopg2.connect(dbname=self.dbname,
                                    user=self.dbuser,
                                    password=self.dbpassword,
                                    host=self.host,
                                    port=self.port)
            conn.close()
        except Exception as e:
            result.error = u'Error occurred: %s' % (e.message)
            result.succeeded = False
        else:
            result.succeeded = True

        return result
Example #14
0
    def _run(self):
        result = StatusCheckResult(status_check=self)

        try:
            rval = self._check()
        except StatusGoException as e:
            result.raw_data = e.raw_data
            result.error = u'Error occurred: {}'.format(e.message)
            result.succeeded = False
        except Exception as e:
            result.error = u'Error occurred: {}'.format(e)
            result.succeeded = False
        else:
            result.raw_data = rval
            result.succeeded = True

        return result
Example #15
0
    def _run(self):
        result = StatusCheckResult(status_check=self)

        sess = SmtpSession()
        conversation = []
        try:
            sess.connect(self.host, self.port)
            sess.ehlo(self.helo_address)

            if self.sender:
                sess.call('MAIL FROM:', self.sender)

            if self.sender and self.recipient:
                sess.call('RCPT TO:', self.recipient)

        except Exception as e:
            result.error = u'Error occurred %s: %s' % (
                e.__class__.__name__,
                e.message,
            )
            result.succeeded = False

            if len(sess.response_codes) > 0:
                result.succeeded = self.expected_code == sess.response_codes[
                    -1]
        except:
            result.error = u'Error occurred: %s' % (sys.exc_info()[0], )
            result.succeeded = False
        else:
            result.succeeded = False
            if len(sess.response_codes) > 0:
                result.succeeded = self.expected_code == sess.response_codes[
                    -1]
        finally:
            sess.quit()

        result.raw_data = "\n".join(sess.conversation)

        return result
Example #16
0
    def test_cleanup_batch(self):
        initial_results = StatusCheckResult.objects.all().count()

        for i in range(2):
            StatusCheckResult(
                status_check=self.graphite_check,
                time=timezone.now() - timedelta(days=61),
                time_complete=timezone.now() - timedelta(days=61),
                succeeded=False
            ).save()

        self.assertEqual(StatusCheckResult.objects.all().count(), initial_results + 2)
        tasks.clean_db(batch_size=1)
        self.assertEqual(StatusCheckResult.objects.all().count(), initial_results)
Example #17
0
    def _run(self):
        result = StatusCheckResult(status_check=self)

        try:
            s = socket.create_connection((self.host, self.port), self.timeout)
        except Exception as e:
            result.error = u'Error occurred: %s' % (e.message, )
            result.succeeded = False
        else:
            # the connection was successful, refine the check by verifying
            # if other success criteria are satisfied (if any)

            # here we verify whether the check consists of sending a message to
            # the server or not, and whether it is a binay payload or not
            if self.message_to_send:
                if self.message_to_send_b64:
                    self.message_to_send = self.message_to_send.decode(
                        'base64')
                s.send(self.message_to_send)

            # probe it further, by comparing the received response with the
            # expected one
            if self.expected_reply:
                if self.expected_reply_b64:
                    self.expected_reply = self.expected_reply.decode('base64')

                # here we only read as many bytes as the length of the
                # expected response. This is done for convenience - sometimes
                # a server's reply can be pretty long, and if you only care
                # about the beginning of the message, there's no need to look
                # into the remaining part. For example, in the case of HTTP
                # what we expect is `HTTP/1.1 200 OK` and we ignore the rest of
                # the response
                received_response = s.read(len(self.expected_reply))
                if received_response == self.expected_reply:
                    result.succeeded = True
                else:
                    result.error = u'Got unexpected response %r' % (
                        received_response, )
                    result.succeeded = False

            result.succeeded = True
        finally:
            s.shutdown(socket.SHUT_RDWR)
            s.close()

        return result
Example #18
0
    def test_print_tags(self):
        StatusCheckResult.objects.all().delete()
        StatusCheckResultTag.objects.all().delete()

        now = timezone.now()
        result = StatusCheckResult(status_check=self.http_check,
                                   time=now,
                                   time_complete=now,
                                   succeeded=False)
        result.save()

        tags = [
            StatusCheckResultTag(value='tag{:03}'.format(i)) for i in range(10)
        ]
        StatusCheckResultTag.objects.bulk_create(tags)

        tags_list = StatusCheckResultTag.objects.all()
        for i in range(len(tags)):
            result.tags.add(tags_list[i])

        self.assertEqual(
            result.print_tags(),
            'tag000\ntag001\ntag002\ntag003\ntag004\ntag005\ntag006\ntag007\ntag008'
            '\ntag009')
Example #19
0
    def test_clean_orphaned_tags(self):
        StatusCheckResult.objects.all().delete()
        StatusCheckResultTag.objects.all().delete()

        now = timezone.now()
        results = [
            StatusCheckResult(status_check=self.http_check,
                              time=now,
                              time_complete=now,
                              succeeded=False) for _ in range(100)
        ]
        StatusCheckResult.objects.bulk_create(results)

        tags = [
            StatusCheckResultTag(value='tag{:03}'.format(i))
            for i in range(100)
        ]
        StatusCheckResultTag.objects.bulk_create(tags)

        results = StatusCheckResult.objects.filter(
            status_check=self.http_check)
        tags = StatusCheckResultTag.objects.all()

        # add tags 0-49 to first 50 results
        for result, tag in zip(results[:50], tags[:50]):
            result.tags.add(tag)

        # tags 50-99 should get cleaned up here
        tasks.clean_orphaned_tags()

        tags = StatusCheckResultTag.objects.order_by('value')
        self.assertEqual(len(tags), 50)  # 50 left
        self.assertEqual(list(tags.values_list('value', flat=True)),
                         [u'tag{:03}'.format(i) for i in range(50)])

        # now if we delete the status check results, all tags should all get cleaned up
        StatusCheckResult.objects.all().delete()
        tasks.clean_orphaned_tags()

        tags = StatusCheckResultTag.objects.order_by('value')
        self.assertEqual(len(tags), 0)
Example #20
0
def run_metrics_check(check):
    """
    Run the status check.
    :param check: the status check
    :return: a StatusCheckResult containing success/failure/error information
    """
    # Get the series data. If there was an error, return immediately.
    series = check.get_series()

    # If there was an error fetching metrics, fail
    if series['error'] is True:
        message = series.get('error_message')
        logger.exception('Error fetching metrics: {}: {}'.format(
            series.get('error_code'), message))
        error = 'Error fetching metric from source: {}'.format(message)
        return StatusCheckResult(status_check=check,
                                 succeeded=False,
                                 error=error), [check.tag_fetch_error]

    # If the series is empty, apply the empty-series handler
    if series['data'] == []:
        if check.on_empty_series == defs.ON_EMPTY_SERIES_PASS:
            return StatusCheckResult(status_check=check,
                                     succeeded=True,
                                     error='SUCCESS: no data'), []
        if check.on_empty_series == defs.ON_EMPTY_SERIES_WARN:
            check.importance = Service.WARNING_STATUS
            tags = [check.tag_no_data]
            return StatusCheckResult(status_check=check,
                                     succeeded=False,
                                     error='WARNING: no data'), tags
        if check.on_empty_series == defs.ON_EMPTY_SERIES_FAIL:
            check.importance = check.high_alert_importance
            tags = [check.tag_no_data]
            return StatusCheckResult(status_check=check,
                                     succeeded=False,
                                     error='{}: no data'.format(
                                         check.importance)), tags

    # Ignore all checks before the following start time
    start_time = time.time() - check.time_range * 60

    def filter_old_points(p):
        timestamp = p[0]
        if timestamp <= start_time:
            logger.debug('Ignoring point {} older than {}'.format(
                str(p), str(start_time)))
            return False
        return True

    parsed_series = series['data']
    logger.info('Processing series {}'.format(str(parsed_series)))

    # order is important - most severe first, since we report the first error found
    thresholds = [
        (check.high_alert_importance, check.high_alert_value),
        (Service.WARNING_STATUS, check.warning_value),
    ]

    # Process each series, updating result and tags as we go
    result = StatusCheckResult(status_check=check, succeeded=True)
    result.raw_data = _get_raw_data_with_thresholds(check, series)
    tags = []

    # loop order is:
    #   (high_importance, series_1), (high_importance, series_2), ...,
    #   (warning, series_1), (warning, series_2), ...
    # and we report the first error encountered as our error
    # (but continue looping so we accumulate tags)
    for importance, threshold in thresholds:
        for series_data in parsed_series:
            series_name = series_data['series']
            datapoints = list(
                filter(filter_old_points, series_data['datapoints']))
            failing_point = _point_triggering_alert(datapoints,
                                                    check.check_type,
                                                    check.consecutive_failures,
                                                    threshold)
            if failing_point is not None:
                tags.append(check.tag_failing(importance, series_name))
                if result.succeeded:
                    # record the first, most severe failure
                    result.succeeded = False
                    check.importance = importance
                    result.error = _get_error_message(check, threshold,
                                                      importance, series_name,
                                                      failing_point[1])

            logger.info('Finished processing series {}'.format(series_name))

    return result, tags
Example #21
0
    def _run(self):
        if not hasattr(self, 'utcnow'):
            self.utcnow = None

        result = StatusCheckResult(status_check=self)
        # NOTE: Can be added later
        # last_result = self.last_result()
        #
        # if last_result:
        #     last_result_started = last_result.time
        #     time_to_check = max(self.frequency, ((timezone.now() - last_result_started).total_seconds() / 60) + 1)
        # else:
        #     time_to_check = self.frequency

        output = self.parse_metric()
        result.raw_data = output["raw"]

        # Check if the metric condition
        if output["error"]:
            result.error = output["error"]
            result.succeeded = False
            return result

        if not output["num_series_with_data"]:
            result.error = "Empty result for given metric"
            result.succeeded = False
            return result

        failures = []
        failure_value = None
        if output['num_series_with_data'] > 0:
            result.average_value = output['average_value']
            for s in output['series']:
                if not s["values"]:
                    continue
                failure_value = None
                if self.check_type == '<':
                    if float(s["min"]) < float(self.value):
                        failure_value = s["min"]
                elif self.check_type == '<=':
                    if float(s["min"]) <= float(self.value):
                        failure_value = s["min"]
                elif self.check_type == '>':
                    if float(s["max"]) > float(self.value):
                        failure_value = s["max"]
                elif self.check_type == '>=':
                    if float(s["max"]) >= float(self.value):
                        failure_value = s["max"]
                elif self.check_type == '==':
                    if float(self.value) in s['values']:
                        failure_value = float(self.value)
                else:
                    raise Exception(u'Check type %s not supported' %
                                    self.check_type)

                if failure_value:
                    failures.append(failure_value)

        if len(failures) > self.allowed_num_failures:
            result.succeeded = False

        elif output['num_series_with_data'] < self.expected_num_hosts:
            result.succeeded = False
        else:
            result.succeeded = True

        if not result.succeeded:
            # targets = [s["target"] for s in output["series"]]
            # hosts = minimize_targets(targets)
            # hosts_by_target = dict(zip(targets, hosts))

            result.error = self.format_error_message(
                failures,
                output['num_series_with_data']
            )

        return result
Example #22
0
class LocalTestCase(TestCase):

    def setUp(self):
        requests.get = Mock()
        requests.post = Mock()
        rest.TwilioRestClient = Mock()
        mail.send_mail = Mock()
        self.create_dummy_data()
        super(LocalTestCase, self).setUp()

    def create_dummy_data(self):
        self.username = '******'
        self.password = '******'
        self.user = User.objects.create(username=self.username)
        self.user.set_password(self.password)
        self.user.save()
        self.graphite_check = GraphiteStatusCheck.objects.create(
            name='Graphite Check',
            metric='stats.fake.value',
            check_type='>',
            value='9.0',
            created_by=self.user,
            importance=Service.ERROR_STATUS,
        )
        self.jenkins_check = JenkinsStatusCheck.objects.create(
            name='Jenkins Check',
            created_by=self.user,
            importance=Service.ERROR_STATUS,
            max_queued_build_time=10,
        )
        self.http_check = HttpStatusCheck.objects.create(
            name='Http Check',
            created_by=self.user,
            importance=Service.CRITICAL_STATUS,
            endpoint='http://arachnys.com',
            timeout=10,
            status_code='200',
            text_match=None,
        )
        self.service = Service.objects.create(
            name='Service',
        )

        self.service.status_checks.add(
            self.graphite_check, self.jenkins_check, self.http_check)
        # Passing is most recent
        self.most_recent_result = StatusCheckResult(
            check=self.graphite_check,
            time=timezone.now() - timedelta(seconds=1),
            time_complete=timezone.now(),
            succeeded=True
        )
        self.most_recent_result.save()
        # failing is second most recent
        self.older_result = StatusCheckResult(
            check=self.graphite_check,
            time=timezone.now() - timedelta(seconds=60),
            time_complete=timezone.now() - timedelta(seconds=59),
            succeeded=False
        )
        self.older_result.save()
        self.graphite_check.save()  # Will recalculate status
Example #23
0
    def create_dummy_data(self):
        self.username = '******'
        self.password = '******'
        self.user = User.objects.create(username=self.username)
        self.user.set_password(self.password)
        self.user.user_permissions.add(
            Permission.objects.get(codename='add_service'),
            Permission.objects.get(codename='add_httpstatuscheck'),
            Permission.objects.get(codename='add_jenkinsstatuscheck'),
            Permission.objects.get(codename='add_tcpstatuscheck'),
        )
        self.user.save()

        self.jenkins_check = JenkinsStatusCheck.objects.create(
            id=10101,
            name='Jenkins Check',
            created_by=self.user,
            importance=Service.ERROR_STATUS,
            max_queued_build_time=10,
            max_build_failures=5)

        self.jenkins_check2 = JenkinsStatusCheck.objects.create(
            id=10104,
            name='Jenkins Check 2',
            created_by=self.user,
            importance=Service.ERROR_STATUS,
            max_queued_build_time=10,
            max_build_failures=0)

        self.http_check = HttpStatusCheck.objects.create(
            id=10102,
            name='Http Check',
            created_by=self.user,
            importance=Service.CRITICAL_STATUS,
            endpoint='http://arachnys.com',
            timeout=10,
            status_code='200',
            text_match=None,
        )
        self.tcp_check = TCPStatusCheck.objects.create(
            id=10103,
            name='TCP Check',
            created_by=self.user,
            importance=Service.ERROR_STATUS,
            address='github.com',
            port=80,
            timeout=6,
        )

        # Set ical_url for schedule to filename we're using for mock response
        self.schedule = Schedule.objects.create(
            name='Principal',
            ical_url='calendar_response.ics',
        )
        self.secondary_schedule = Schedule.objects.create(
            name='Secondary',
            ical_url='calendar_response_different.ics',
            fallback_officer=self.user,
        )
        self.schedule.save()
        self.secondary_schedule.save()

        self.service = Service.objects.create(
            id=2194,
            name='Service',
        )
        self.service.save()
        self.service.schedules.add(self.schedule)
        self.service.status_checks.add(self.jenkins_check, self.http_check,
                                       self.tcp_check)

        # Failing is second most recent
        self.older_result = StatusCheckResult(
            status_check=self.http_check,
            time=timezone.now() - timedelta(seconds=60),
            time_complete=timezone.now() - timedelta(seconds=59),
            succeeded=False)
        self.older_result.save()
        # Passing is most recent
        self.most_recent_result = StatusCheckResult(
            status_check=self.http_check,
            time=timezone.now() - timedelta(seconds=1),
            time_complete=timezone.now(),
            succeeded=True)
        self.most_recent_result.save()
        self.http_check.save()  # Will recalculate status
Example #24
0
class LocalTestCase(APITestCase):
    def setUp(self):
        requests.get = Mock()
        requests.post = Mock()
        rest.TwilioRestClient = Mock()
        mail.send_mail = Mock()
        self.create_dummy_data()
        super(LocalTestCase, self).setUp()

    def create_dummy_data(self):
        self.username = "******"
        self.password = "******"
        self.user = User.objects.create(username=self.username)
        self.user.set_password(self.password)
        self.user.user_permissions.add(
            Permission.objects.get(codename="add_instance"),
            Permission.objects.get(codename="add_service"),
            Permission.objects.get(codename="add_httpstatuscheck"),
            Permission.objects.get(codename="add_graphitestatuscheck"),
            Permission.objects.get(codename="add_jenkinsstatuscheck"),
            Permission.objects.get(codename="add_icmpstatuscheck"),
        )
        self.user.save()
        self.graphite_check = GraphiteStatusCheck.objects.create(
            name="Graphite Check",
            metric="stats.fake.value",
            check_type=">",
            value="9.0",
            created_by=self.user,
            importance=Service.ERROR_STATUS,
        )
        self.jenkins_check = JenkinsStatusCheck.objects.create(
            name="Jenkins Check", created_by=self.user, importance=Service.ERROR_STATUS, max_queued_build_time=10
        )
        self.http_check = HttpStatusCheck.objects.create(
            name="Http Check",
            created_by=self.user,
            importance=Service.CRITICAL_STATUS,
            endpoint="http://arachnys.com",
            timeout=10,
            status_code="200",
            text_match=None,
        )
        self.service = Service.objects.create(name="Service")

        self.service.status_checks.add(self.graphite_check, self.jenkins_check, self.http_check)
        # failing is second most recent
        self.older_result = StatusCheckResult(
            check=self.graphite_check,
            time=timezone.now() - timedelta(seconds=60),
            time_complete=timezone.now() - timedelta(seconds=59),
            succeeded=False,
        )
        self.older_result.save()
        # Passing is most recent
        self.most_recent_result = StatusCheckResult(
            check=self.graphite_check,
            time=timezone.now() - timedelta(seconds=1),
            time_complete=timezone.now(),
            succeeded=True,
        )
        self.most_recent_result.save()
        self.graphite_check.save()  # Will recalculate status
Example #25
0
    def _run(self):
        result = StatusCheckResult(status_check=self)

        try:
            client = get_boto_client(self.cloudwatch_config)
        except Exception as e:
            result.succeeded = False
            result.error = u"Couldn't create cloudwatch client: {}".format(e)
            return result
        else:
            namespace, metric_name = self.cloudwatch_metric.split(":")
            start_time = datetime.now() - timedelta(minutes=self.frequency)
            end_time = datetime.now()
            resp = client.get_metric_statistics(
                Namespace=namespace,
                MetricName=metric_name,
                Dimensions=self.parsed_dimensions(),
                StartTime=start_time,
                EndTime=end_time,
                Period=60,
                Statistics=['SampleCount','Average','Sum','Minimum','Maximum',],
            )

            if len(resp['Datapoints']) == 0:
                result.succeeded = False
                result.error = u"No datapoints"
                return result

            failures = []
            stats = [dp[self.statistic] for dp in resp['Datapoints']]
            for stat in stats:
                failure_value = None
                if self.check_type == '<':
                    if stat < float(self.value):
                        failure_value = stat
                elif self.check_type == '<=':
                    if stat <= float(self.value):
                        failure_value = stat
                elif self.check_type == '>':
                    if stat > float(self.value):
                        failure_value = stat
                elif self.check_type == '>=':
                    if stat >= float(self.value):
                        failure_value = stat
                elif self.check_type == '==':
                    if float(self.value) == stat:
                        failure_value = float(self.value)
                else:
                    raise Exception(u'Check type %s not supported' %
                                    self.check_type)

                if not failure_value is None:
                    failures.append(failure_value)

            if len(failures) > 0:
                result.succeeded = False
                result.error = u"{} {} {}".format(failures, self.check_type, self.value)
                return result

        result.succeeded = True
        return result
 def _run(self):
     result = StatusCheckResult(status_check=self)
     try:
         self.checkIfMonitorIdExists()
         monitorResponse = self.findMonitor()
         if (monitorResponse.status_code == 401):
             result.error = u"Cant find monitor process {} with id: {}. Probably it was deleted.".format(self.monitor_name,self.monitor_id)
             result.succeeded = False
             result.raw_data = '401 UNAUTHORIZED'
             return result
         if (monitorResponse.status_code == 404):
             result.error = u"Cant find monitor process {} with id: {}. Probably it was deleted.".format(self.monitor_name,self.monitor_id)
             result.succeeded = False
             result.raw_data = '404 NOT FOUND'
             return result
         if (monitorResponse.status_code == 200):
             monitorData = monitorResponse.json().get('monitorDetails')
             if (monitorData.get('isDown')):
                 result.error = u"Monitor process {} is down! Please checkin using URL: {}".format(self.monitor_name,self.monitor_checkin)
                 result.succeeded = False
                 result.raw_data = self.buildRawData(monitorData)
                 return result
             else:
                 result.succeeded = True
                 result.error = 'None'
                 result.raw_data = 'Monitor is alive!'
                 return result
         result.succeeded = True
         result.error = 'Unexpected response!'
         result.raw_data = u'Response code is: {}'.format(monitorResponse.status_code)
         return result
     except Exception as e:
         result.error = e.args
         result.succeeded = False
         result.raw_data = e.args
         return result