def test_check_smartstack_replication_for_instance_ok_with_enough_replication( instance_config ): expected_replication_count = 8 mock_smartstack_replication_checker = mock.Mock() mock_smartstack_replication_checker.get_replication_for_instance.return_value = { "fake_region": { "test.canary": 1, "test.low_replication": 4, "fake_service.fake_instance": 8, } } with mock.patch( "paasta_tools.monitoring_tools.send_replication_event", autospec=True ) as mock_send_replication_event: monitoring_tools.check_smartstack_replication_for_instance( instance_config=instance_config, expected_count=expected_replication_count, smartstack_replication_checker=mock_smartstack_replication_checker, ) mock_send_replication_event.assert_called_once_with( instance_config=instance_config, status=pysensu_yelp.Status.OK, output=mock.ANY, ) _, send_replication_event_kwargs = mock_send_replication_event.call_args alert_output = send_replication_event_kwargs["output"] assert ( "{} has 8 out of 8 expected instances in fake_region (OK: 100%)".format( instance_config.job_id ) ) in alert_output
def test_check_smartstack_replication_for_instance_crit_when_missing_replication_multilocation( instance_config, ): expected_replication_count = 2 mock_smartstack_replication_checker = mock.Mock() mock_smartstack_replication_checker.get_replication_for_instance.return_value = { "fake_region": {"test.main": 0}, "fake_other_region": {"test.main": 0}, } with mock.patch( "paasta_tools.monitoring_tools.send_replication_event", autospec=True ) as mock_send_replication_event: monitoring_tools.check_smartstack_replication_for_instance( instance_config=instance_config, expected_count=expected_replication_count, smartstack_replication_checker=mock_smartstack_replication_checker, ) mock_send_replication_event.assert_called_once_with( instance_config=instance_config, status=pysensu_yelp.Status.CRITICAL, output=mock.ANY, ) _, send_replication_event_kwargs = mock_send_replication_event.call_args alert_output = send_replication_event_kwargs["output"] assert ( "{} has 0 out of 1 expected instances in fake_region".format( instance_config.job_id ) ) in alert_output assert ( "{} has 0 out of 1 expected instances in fake_other_region".format( instance_config.job_id ) ) in alert_output
def check_service_replication( instance_config, all_tasks, smartstack_replication_checker, ): """Checks a service's replication levels based on how the service's replication should be monitored. (smartstack or mesos) :param instance_config: an instance of MarathonServiceConfig :param smartstack_replication_checker: an instance of MesosSmartstackReplicationChecker """ expected_count = instance_config.get_instances() log.info("Expecting %d total tasks for %s" % (expected_count, instance_config.job_id)) proxy_port = get_proxy_port_for_instance(instance_config) registrations = instance_config.get_registrations() # if the primary registration does not match the service_instance name then # the best we can do is check marathon for replication (for now). if proxy_port is not None and registrations[0] == instance_config.job_id: monitoring_tools.check_smartstack_replication_for_instance( instance_config=instance_config, expected_count=expected_count, smartstack_replication_checker=smartstack_replication_checker, ) else: check_healthy_marathon_tasks_for_service_instance( instance_config=instance_config, expected_count=expected_count, all_tasks=all_tasks, )
def test_check_smartstack_replication_for_instance_crit_when_low_replication_multilocation( instance_config, ): expected_replication_count = 2 mock_smartstack_replication_checker = mock.Mock() mock_smartstack_replication_checker.get_replication_for_instance.return_value = \ { 'fake_region': {'fake_service.fake_instance': 1}, 'fake_other_region': {'fake_service.fake_instance': 0}, } with mock.patch( 'paasta_tools.monitoring_tools.send_replication_event', autospec=True, ) as mock_send_replication_event: monitoring_tools.check_smartstack_replication_for_instance( instance_config=instance_config, expected_count=expected_replication_count, smartstack_replication_checker=mock_smartstack_replication_checker, ) mock_send_replication_event.assert_called_once_with( instance_config=instance_config, status=pysensu_yelp.Status.CRITICAL, output=mock.ANY, ) _, send_replication_event_kwargs = mock_send_replication_event.call_args alert_output = send_replication_event_kwargs["output"] assert ("{} has 1 out of 1 expected instances in fake_region".format( instance_config.job_id)) in alert_output assert ( "{} has 0 out of 1 expected instances in fake_other_region".format( instance_config.job_id)) in alert_output assert ("paasta status -s {} -i {} -c {} -vv".format( instance_config.service, instance_config.instance, instance_config.cluster, )) in alert_output
def check_kubernetes_pod_replication( instance_config: KubernetesDeploymentConfig, all_pods: Sequence[V1Pod], smartstack_replication_checker: KubeSmartstackReplicationChecker, ) -> None: """Checks a service's replication levels based on how the service's replication should be monitored. (smartstack or k8s) :param instance_config: an instance of KubernetesDeploymentConfig :param smartstack_replication_checker: an instance of KubeSmartstackReplicationChecker """ expected_count = instance_config.get_instances() log.info( "Expecting %d total tasks for %s" % (expected_count, instance_config.job_id) ) proxy_port = get_proxy_port_for_instance(instance_config) registrations = instance_config.get_registrations() # if the primary registration does not match the service_instance name then # the best we can do is check k8s for replication (for now). if proxy_port is not None and registrations[0] == instance_config.job_id: monitoring_tools.check_smartstack_replication_for_instance( instance_config=instance_config, expected_count=expected_count, smartstack_replication_checker=smartstack_replication_checker, ) else: check_healthy_kubernetes_tasks_for_service_instance( instance_config=instance_config, expected_count=expected_count, all_pods=all_pods, )
def test_check_smartstack_replication_for_instance_crit_when_low_replication( instance_config): expected_replication_count = 8 mock_smartstack_replication_checker = mock.Mock() mock_smartstack_replication_checker.get_replication_for_instance.return_value = { "fake_region": { "test.canary": 1, "fake_service.fake_instance": 4, "test.fully_replicated": 8, } } with mock.patch("paasta_tools.monitoring_tools.send_replication_event", autospec=True) as mock_send_replication_event: monitoring_tools.check_smartstack_replication_for_instance( instance_config=instance_config, expected_count=expected_replication_count, smartstack_replication_checker=mock_smartstack_replication_checker, ) mock_send_replication_event.assert_called_once_with( instance_config=instance_config, status=pysensu_yelp.Status.CRITICAL, output=mock.ANY, ) _, send_replication_event_kwargs = mock_send_replication_event.call_args alert_output = send_replication_event_kwargs["output"] assert ("Service {} has 4 out of 8 expected instances in fake_region". format(instance_config.job_id)) in alert_output assert ("paasta status -s {} -i {} -c {} -vv".format( instance_config.service, instance_config.instance, instance_config.cluster, )) in alert_output
def test_check_smartstack_replication_for_instance_crit_when_absent(instance_config): expected_replication_count = 8 mock_smartstack_replication_checker = mock.Mock() mock_smartstack_replication_checker.get_replication_for_instance.return_value = { "fake_region": {"test.two": 1, "test.three": 4, "test.four": 8} } with mock.patch( "paasta_tools.monitoring_tools.send_replication_event", autospec=True ) as mock_send_replication_event: monitoring_tools.check_smartstack_replication_for_instance( instance_config=instance_config, expected_count=expected_replication_count, smartstack_replication_checker=mock_smartstack_replication_checker, ) mock_send_replication_event.assert_called_once_with( instance_config=instance_config, status=pysensu_yelp.Status.CRITICAL, output=mock.ANY, )
def test_check_smartstack_replication_for_instance_ok_when_expecting_zero( instance_config): expected_replication_count = 0 mock_smartstack_replication_checker = mock.Mock() mock_smartstack_replication_checker.get_replication_for_instance.return_value = \ {'fake_region': {'test.main': 1, 'test.three': 4, 'test.four': 8}} with mock.patch( 'paasta_tools.monitoring_tools.send_replication_event', autospec=True, ) as mock_send_replication_event: monitoring_tools.check_smartstack_replication_for_instance( instance_config=instance_config, expected_count=expected_replication_count, smartstack_replication_checker=mock_smartstack_replication_checker, ) mock_send_replication_event.assert_called_once_with( instance_config=instance_config, status=pysensu_yelp.Status.OK, output=mock.ANY, )
def test_check_smartstack_replication_for_instance_crit_when_no_smartstack_info( instance_config, ): expected_replication_count = 2 mock_smartstack_replication_checker = mock.Mock() mock_smartstack_replication_checker.get_replication_for_instance.return_value = {} with mock.patch("paasta_tools.monitoring_tools.send_replication_event", autospec=True) as mock_send_replication_event: monitoring_tools.check_smartstack_replication_for_instance( instance_config=instance_config, expected_count=expected_replication_count, smartstack_replication_checker=mock_smartstack_replication_checker, ) mock_send_replication_event.assert_called_once_with( instance_config=instance_config, status=pysensu_yelp.Status.CRITICAL, output=mock.ANY, ) _, send_replication_event_kwargs = mock_send_replication_event.call_args alert_output = send_replication_event_kwargs["output"] assert (f"{instance_config.job_id} has no Smartstack replication info." ) in alert_output
def check_kubernetes_pod_replication( instance_config: KubernetesDeploymentConfig, all_tasks_or_pods: Sequence[V1Pod], smartstack_replication_checker: KubeSmartstackReplicationChecker, default_alert_after: Optional[str] = DEFAULT_ALERT_AFTER, ) -> Optional[bool]: """Checks a service's replication levels based on how the service's replication should be monitored. (smartstack or k8s) :param instance_config: an instance of KubernetesDeploymentConfig :param smartstack_replication_checker: an instance of KubeSmartstackReplicationChecker """ expected_count = instance_config.get_instances() log.info( "Expecting %d total tasks for %s" % (expected_count, instance_config.job_id) ) proxy_port = get_proxy_port_for_instance(instance_config) registrations = instance_config.get_registrations() # If this instance does not autoscale and only has 1 instance, set alert after to 20m. # Otherwise, set it to 10 min. if ( not instance_config.is_autoscaling_enabled() and instance_config.get_instances() == 1 ): default_alert_after = "20m" if "monitoring" not in instance_config.config_dict: instance_config.config_dict["monitoring"] = {} instance_config.config_dict["monitoring"][ "alert_after" ] = instance_config.config_dict["monitoring"].get( "alert_after", default_alert_after ) # if the primary registration does not match the service_instance name then # the best we can do is check k8s for replication (for now). if proxy_port is not None and registrations[0] == instance_config.job_id: is_well_replicated = monitoring_tools.check_smartstack_replication_for_instance( instance_config=instance_config, expected_count=expected_count, smartstack_replication_checker=smartstack_replication_checker, ) return is_well_replicated else: check_healthy_kubernetes_tasks_for_service_instance( instance_config=instance_config, expected_count=expected_count, all_pods=all_tasks_or_pods, ) return None
def test_check_smartstack_replication_for_instance_emits_metrics(instance_config): with mock.patch( "paasta_tools.monitoring_tools.send_replication_event", autospec=True ), mock.patch( "paasta_tools.monitoring_tools.yelp_meteorite", autospec=True ), mock.patch( "paasta_tools.monitoring_tools.emit_replication_metrics", autospec=True ) as mock_emit_replication_metrics: mock_smartstack_replication_checker = mock.Mock() mock_smartstack_replication_checker.get_replication_for_instance.return_value = { "fake_region": {"fake_service.fake_instance": 10} } monitoring_tools.check_smartstack_replication_for_instance( instance_config=instance_config, expected_count=10, smartstack_replication_checker=mock_smartstack_replication_checker, ) mock_emit_replication_metrics.assert_called_once_with( mock_smartstack_replication_checker.get_replication_for_instance.return_value, instance_config, 10, )