def test_it_calculates_downtime_summary(self, mock_now): mock_now.return_value = datetime(2020, 2, 1, tzinfo=timezone.utc) self.check.created = datetime(2019, 1, 1, 0, 0, 0, tzinfo=timezone.utc) self.check.save() # going down on Jan 15, at 12:00 f1 = Flip(owner=self.check) f1.created = datetime(2020, 1, 15, 12, 0, 0, tzinfo=timezone.utc) f1.old_status = "up" f1.new_status = "down" f1.save() # back up on Jan 15, at 13:00 f2 = Flip(owner=self.check) f2.created = datetime(2020, 1, 15, 13, 0, 0, tzinfo=timezone.utc) f2.old_status = "down" f2.new_status = "up" f2.save() self.client.login(username="******", password="******") r = self.client.get(self.url) self.assertContains(r, "Feb. 2020") self.assertContains(r, "Jan. 2020") self.assertContains(r, "Dec. 2019") # The summary for Jan. 2020 should be "1 downtime, 1 hour total" self.assertContains(r, "1 downtime, 1 hour total", html=True)
def handle_going_down(self): """ Process a single check going down. """ now = timezone.now() check = Check.objects.filter(alert_after__lt=now, status="up").first() if check is None: return False q = Check.objects.filter(id=check.id, status="up") current_status = check.get_status() if current_status != "down": # It is not down yet. Update alert_after q.update(alert_after=check.get_alert_after()) return True # Atomically update status num_updated = q.update(status="down") if num_updated != 1: # Nothing got updated: another worker process got there first. return True flip = Flip(owner=check) flip.created = check.get_alert_after() flip.old_status = "up" flip.new_status = "down" flip.save() check.status = "down" check.save() return True
def test_it_clears_next_nag_date(self): self.profile.nag_period = td(hours=1) self.profile.next_nag_date = now() - td(minutes=30) self.profile.save() self.bobs_profile.nag_period = td(hours=1) self.bobs_profile.next_nag_date = now() - td(minutes=30) self.bobs_profile.save() check = Check(project=self.project, status="up") check.last_ping = now() check.save() flip = Flip(owner=check, created=check.last_ping) flip.old_status = "down" flip.new_status = "up" flip.save() notify(flip.id, Mock()) # next_nag_gate should now be cleared out for the project's owner self.profile.refresh_from_db() self.assertIsNone(self.profile.next_nag_date) # next_nag_gate should now be cleared out for the project's members self.bobs_profile.refresh_from_db() self.assertIsNone(self.bobs_profile.next_nag_date)
def test_it_returns_num_unprocessed_flips(self): check = Check.objects.create(project=self.project, status="down") flip = Flip(owner=check) flip.created = now() flip.old_status = "up" flip.new_status = "down" flip.save() r = self.client.get(self.url, HTTP_X_METRICS_KEY="foo") self.assertEqual(r.status_code, 200) doc = r.json() self.assertEqual(doc["num_unprocessed_flips"], 1)
def test_it_updates_members_next_nag_date(self): self.bobs_profile.nag_period = td(hours=1) self.bobs_profile.save() check = Check(project=self.project, status="down") check.last_ping = now() - td(days=2) check.save() flip = Flip(owner=check, created=check.last_ping) flip.old_status = "up" flip.new_status = "down" flip.save() notify(flip.id, Mock()) self.bobs_profile.refresh_from_db() self.assertIsNotNone(self.bobs_profile.next_nag_date)
def test_it_updates_owners_next_nag_date(self): self.profile.nag_period = timedelta(hours=1) self.profile.save() check = Check(user=self.alice, status="down") check.last_ping = now() - timedelta(days=2) check.alert_after = check.get_alert_after() check.save() flip = Flip(owner=check, created=check.last_ping) flip.old_status = "up" flip.new_status = "down" flip.save() notify(flip.id, Mock()) self.profile.refresh_from_db() self.assertIsNotNone(self.profile.next_nag_date)
def test_it_does_not_touch_already_set_next_nag_dates(self): original_nag_date = now() - td(minutes=30) self.profile.nag_period = td(hours=1) self.profile.next_nag_date = original_nag_date self.profile.save() check = Check(project=self.project, status="down") check.last_ping = now() - td(days=2) check.save() flip = Flip(owner=check, created=check.last_ping) flip.old_status = "up" flip.new_status = "down" flip.save() notify(flip.id, Mock()) self.profile.refresh_from_db() self.assertEqual(self.profile.next_nag_date, original_nag_date)
def test_downtimes_handles_flip_one_day_ago(self): check = Check.objects.create(project=self.project, status="down") check.created = datetime(2019, 1, 1, tzinfo=timezone.utc) flip = Flip(owner=check) flip.created = datetime(2020, 1, 14, tzinfo=timezone.utc) flip.old_status = "up" flip.new_status = "down" flip.save() r = check.downtimes(10) self.assertEqual(len(r), 10) for dt, downtime, outages in r: if dt.month == 1: self.assertEqual(downtime.total_seconds(), 86400) self.assertEqual(outages, 1) else: self.assertEqual(downtime.total_seconds(), 0) self.assertEqual(outages, 0)
def handle_going_down(self): """ Process a single check going down. """ now = timezone.now() q = Check.objects.filter(alert_after__lt=now).exclude(status="down") # Sort by alert_after, to avoid unnecessary sorting by id: check = q.order_by("alert_after").first() if check is None: return False old_status = check.status q = Check.objects.filter(id=check.id, status=old_status) try: status = check.get_status() except Exception as e: # Make sure we don't trip on this check again for an hour: # Otherwise sendalerts may end up in a crash loop. q.update(alert_after=now + td(hours=1)) # Then re-raise the exception: raise e if status != "down": # It is not down yet. Update alert_after q.update(alert_after=check.going_down_after()) return True # Atomically update status flip_time = check.going_down_after() num_updated = q.update(alert_after=None, status="down") if num_updated != 1: # Nothing got updated: another worker process got there first. return True flip = Flip(owner=check) flip.created = flip_time flip.old_status = old_status flip.new_status = "down" flip.save() return True
def update_timeout(request, code): check = _get_check_for_user(request, code) kind = request.POST.get("kind") if kind == "simple": form = TimeoutForm(request.POST) if not form.is_valid(): return HttpResponseBadRequest() check.kind = "simple" check.timeout = form.cleaned_data["timeout"] check.grace = form.cleaned_data["grace"] elif kind == "cron": form = CronForm(request.POST) if not form.is_valid(): return HttpResponseBadRequest() check.kind = "cron" check.schedule = form.cleaned_data["schedule"] check.tz = form.cleaned_data["tz"] check.grace = td(minutes=form.cleaned_data["grace"]) if check.last_ping: check.alert_after = check.get_alert_after() # Changing timeout can change check's status: is_up = check.get_status() in ("up", "grace") if is_up and check.status != "up": flip = Flip(owner=check) flip.created = timezone.now() flip.old_status = check.status flip.new_status = "up" flip.save() check.status = "up" check.save() if "/details/" in request.META.get("HTTP_REFERER", ""): return redirect("hc-details", code) return redirect("hc-checks")
def test_it_processes_flip(self, mock_notify): check = Check(project=self.project, status="up") check.last_ping = now() check.alert_after = check.last_ping + td(days=1, hours=1) check.save() flip = Flip(owner=check, created=check.last_ping) flip.old_status = "down" flip.new_status = "up" flip.save() result = Command().process_one_flip() # If it finds work, it should return True self.assertTrue(result) # It should set the processed date flip.refresh_from_db() self.assertTrue(flip.processed) # It should call `notify_on_thread` self.assertTrue(mock_notify.called)
def test_downtimes_handles_flip_two_months_ago(self, mock_now): mock_now.return_value = datetime(2019, 7, 19, tzinfo=timezone.utc) check = Check.objects.create(project=self.project, status="down") flip = Flip(owner=check) flip.created = datetime(2019, 5, 19, tzinfo=timezone.utc) flip.old_status = "up" flip.new_status = "down" flip.save() r = check.downtimes(10) self.assertEqual(len(r), 10) for dt, downtime, outages in r: if dt.month == 7: self.assertEqual(outages, 1) elif dt.month == 6: self.assertEqual(downtime.total_seconds(), 30 * 86400) self.assertEqual(outages, 1) elif dt.month == 5: self.assertEqual(outages, 1) else: self.assertEqual(downtime.total_seconds(), 0) self.assertEqual(outages, 0)
def handle_going_down(self): """ Process a single check going down. """ now = timezone.now() # In PostgreSQL, add this index to run the below query efficiently: # CREATE INDEX api_check_up ON api_check (alert_after) WHERE status = 'up' q = Check.objects.filter(alert_after__lt=now, status="up") # Sort by alert_after, to avoid unnecessary sorting by id: check = q.order_by("alert_after").first() if check is None: return False q = Check.objects.filter(id=check.id, status="up") current_status = check.get_status() if current_status != "down": # It is not down yet. Update alert_after q.update(alert_after=check.get_alert_after()) return True # Atomically update status num_updated = q.update(status="down") if num_updated != 1: # Nothing got updated: another worker process got there first. return True flip = Flip(owner=check) flip.created = check.get_alert_after() flip.old_status = "up" flip.new_status = "down" flip.save() check.status = "down" check.save() return True