예제 #1
0
    def test_it_calculates_downtime_summary(self, mock_now):
        mock_now.return_value = datetime(2020, 2, 1, tzinfo=timezone.utc)

        self.check.created = datetime(2019, 1, 1, 0, 0, 0, tzinfo=timezone.utc)
        self.check.save()

        # going down on Jan 15, at 12:00
        f1 = Flip(owner=self.check)
        f1.created = datetime(2020, 1, 15, 12, 0, 0, tzinfo=timezone.utc)
        f1.old_status = "up"
        f1.new_status = "down"
        f1.save()

        # back up on Jan 15, at 13:00
        f2 = Flip(owner=self.check)
        f2.created = datetime(2020, 1, 15, 13, 0, 0, tzinfo=timezone.utc)
        f2.old_status = "down"
        f2.new_status = "up"
        f2.save()

        self.client.login(username="******", password="******")
        r = self.client.get(self.url)
        self.assertContains(r, "Feb. 2020")
        self.assertContains(r, "Jan. 2020")
        self.assertContains(r, "Dec. 2019")

        # The summary for Jan. 2020 should be "1 downtime, 1 hour total"
        self.assertContains(r, "1 downtime, 1 hour total", html=True)
예제 #2
0
    def handle_going_down(self):
        """ Process a single check going down.  """

        now = timezone.now()

        check = Check.objects.filter(alert_after__lt=now, status="up").first()
        if check is None:
            return False

        q = Check.objects.filter(id=check.id, status="up")

        current_status = check.get_status()
        if current_status != "down":
            # It is not down yet. Update alert_after
            q.update(alert_after=check.get_alert_after())
            return True

        # Atomically update status
        num_updated = q.update(status="down")
        if num_updated != 1:
            # Nothing got updated: another worker process got there first.
            return True

        flip = Flip(owner=check)
        flip.created = check.get_alert_after()
        flip.old_status = "up"
        flip.new_status = "down"
        flip.save()

        check.status = "down"
        check.save()
        return True
예제 #3
0
    def test_it_returns_num_unprocessed_flips(self):
        check = Check.objects.create(project=self.project, status="down")
        flip = Flip(owner=check)
        flip.created = now()
        flip.old_status = "up"
        flip.new_status = "down"
        flip.save()

        r = self.client.get(self.url, HTTP_X_METRICS_KEY="foo")
        self.assertEqual(r.status_code, 200)

        doc = r.json()
        self.assertEqual(doc["num_unprocessed_flips"], 1)
예제 #4
0
    def test_downtimes_handles_flip_one_day_ago(self):
        check = Check.objects.create(project=self.project, status="down")
        check.created = datetime(2019, 1, 1, tzinfo=timezone.utc)

        flip = Flip(owner=check)
        flip.created = datetime(2020, 1, 14, tzinfo=timezone.utc)
        flip.old_status = "up"
        flip.new_status = "down"
        flip.save()

        r = check.downtimes(10)
        self.assertEqual(len(r), 10)
        for dt, downtime, outages in r:
            if dt.month == 1:
                self.assertEqual(downtime.total_seconds(), 86400)
                self.assertEqual(outages, 1)
            else:
                self.assertEqual(downtime.total_seconds(), 0)
                self.assertEqual(outages, 0)
예제 #5
0
    def handle_going_down(self):
        """ Process a single check going down.  """

        now = timezone.now()

        q = Check.objects.filter(alert_after__lt=now).exclude(status="down")
        # Sort by alert_after, to avoid unnecessary sorting by id:
        check = q.order_by("alert_after").first()
        if check is None:
            return False

        old_status = check.status
        q = Check.objects.filter(id=check.id, status=old_status)

        try:
            status = check.get_status()
        except Exception as e:
            # Make sure we don't trip on this check again for an hour:
            # Otherwise sendalerts may end up in a crash loop.
            q.update(alert_after=now + td(hours=1))
            # Then re-raise the exception:
            raise e

        if status != "down":
            # It is not down yet. Update alert_after
            q.update(alert_after=check.going_down_after())
            return True

        # Atomically update status
        flip_time = check.going_down_after()
        num_updated = q.update(alert_after=None, status="down")
        if num_updated != 1:
            # Nothing got updated: another worker process got there first.
            return True

        flip = Flip(owner=check)
        flip.created = flip_time
        flip.old_status = old_status
        flip.new_status = "down"
        flip.save()

        return True
예제 #6
0
def update_timeout(request, code):
    check = _get_check_for_user(request, code)

    kind = request.POST.get("kind")
    if kind == "simple":
        form = TimeoutForm(request.POST)
        if not form.is_valid():
            return HttpResponseBadRequest()

        check.kind = "simple"
        check.timeout = form.cleaned_data["timeout"]
        check.grace = form.cleaned_data["grace"]
    elif kind == "cron":
        form = CronForm(request.POST)
        if not form.is_valid():
            return HttpResponseBadRequest()

        check.kind = "cron"
        check.schedule = form.cleaned_data["schedule"]
        check.tz = form.cleaned_data["tz"]
        check.grace = td(minutes=form.cleaned_data["grace"])

    if check.last_ping:
        check.alert_after = check.get_alert_after()

        # Changing timeout can change check's status:
        is_up = check.get_status() in ("up", "grace")
        if is_up and check.status != "up":
            flip = Flip(owner=check)
            flip.created = timezone.now()
            flip.old_status = check.status
            flip.new_status = "up"
            flip.save()

            check.status = "up"

    check.save()

    if "/details/" in request.META.get("HTTP_REFERER", ""):
        return redirect("hc-details", code)

    return redirect("hc-checks")
예제 #7
0
    def test_downtimes_handles_flip_two_months_ago(self, mock_now):
        mock_now.return_value = datetime(2019, 7, 19, tzinfo=timezone.utc)

        check = Check.objects.create(project=self.project, status="down")
        flip = Flip(owner=check)
        flip.created = datetime(2019, 5, 19, tzinfo=timezone.utc)
        flip.old_status = "up"
        flip.new_status = "down"
        flip.save()

        r = check.downtimes(10)
        self.assertEqual(len(r), 10)
        for dt, downtime, outages in r:
            if dt.month == 7:
                self.assertEqual(outages, 1)
            elif dt.month == 6:
                self.assertEqual(downtime.total_seconds(), 30 * 86400)
                self.assertEqual(outages, 1)
            elif dt.month == 5:
                self.assertEqual(outages, 1)
            else:
                self.assertEqual(downtime.total_seconds(), 0)
                self.assertEqual(outages, 0)
예제 #8
0
    def handle_going_down(self):
        """ Process a single check going down.  """

        now = timezone.now()

        # In PostgreSQL, add this index to run the below query efficiently:
        # CREATE INDEX api_check_up ON api_check (alert_after) WHERE status = 'up'

        q = Check.objects.filter(alert_after__lt=now, status="up")
        # Sort by alert_after, to avoid unnecessary sorting by id:
        check = q.order_by("alert_after").first()
        if check is None:
            return False

        q = Check.objects.filter(id=check.id, status="up")

        current_status = check.get_status()
        if current_status != "down":
            # It is not down yet. Update alert_after
            q.update(alert_after=check.get_alert_after())
            return True

        # Atomically update status
        num_updated = q.update(status="down")
        if num_updated != 1:
            # Nothing got updated: another worker process got there first.
            return True

        flip = Flip(owner=check)
        flip.created = check.get_alert_after()
        flip.old_status = "up"
        flip.new_status = "down"
        flip.save()

        check.status = "down"
        check.save()
        return True