def test_check_replication_for_instance_crit_when_missing_replication_multilocation( instance_config, ): expected_replication_count = 2 mock_smartstack_replication_checker = mock.Mock() mock_smartstack_replication_checker.get_replication_for_instance.return_value = { "fake_provider": { "fake_region": {"test.main": 0}, "fake_other_region": {"test.main": 0}, } } with mock.patch( "paasta_tools.monitoring_tools.send_replication_event", autospec=True ) as mock_send_replication_event: monitoring_tools.check_replication_for_instance( instance_config=instance_config, expected_count=expected_replication_count, replication_checker=mock_smartstack_replication_checker, dry_run=True, ) mock_send_replication_event.assert_called_once_with( instance_config=instance_config, status=pysensu_yelp.Status.CRITICAL, output=mock.ANY, description=mock.ANY, dry_run=True, ) _, send_replication_event_kwargs = mock_send_replication_event.call_args alert_output = send_replication_event_kwargs["output"] assert ( f"{instance_config.job_id} has 0/1 replicas in fake_region" ) in alert_output assert ( f"{instance_config.job_id} has 0/1 replicas in fake_other_region" ) in alert_output
def test_check_replication_for_instance_ok_when_expecting_zero( instance_config, ): expected_replication_count = 0 mock_smartstack_replication_checker = mock.Mock() mock_smartstack_replication_checker.get_replication_for_instance.return_value = { "fake_provider": { "fake_region": {"test.main": 1, "test.three": 4, "test.four": 8} } } with mock.patch( "paasta_tools.monitoring_tools.send_replication_event", autospec=True ) as mock_send_replication_event: monitoring_tools.check_replication_for_instance( instance_config=instance_config, expected_count=expected_replication_count, replication_checker=mock_smartstack_replication_checker, dry_run=True, ) mock_send_replication_event.assert_called_once_with( instance_config=instance_config, status=pysensu_yelp.Status.OK, output=mock.ANY, description=mock.ANY, dry_run=True, )
def test_check_replication_for_instance_emits_metrics(instance_config): with mock.patch( "paasta_tools.monitoring_tools.send_replication_event", autospec=True ), mock.patch( "paasta_tools.monitoring_tools.yelp_meteorite", autospec=True ), mock.patch( "paasta_tools.monitoring_tools.emit_replication_metrics", autospec=True ) as mock_emit_replication_metrics: mock_smartstack_replication_checker = mock.Mock() mock_smartstack_replication_checker.get_replication_for_instance.return_value = { "fake_provider": {"fake_region": {"fake_service.fake_instance": 10}} } monitoring_tools.check_replication_for_instance( instance_config=instance_config, expected_count=10, replication_checker=mock_smartstack_replication_checker, dry_run=True, ) mock_emit_replication_metrics.assert_called_once_with( mock_smartstack_replication_checker.get_replication_for_instance.return_value, instance_config, 10, dry_run=True, )
def test_check_replication_for_instance_ok_with_enough_replication( instance_config, ): expected_replication_count = 8 mock_smartstack_replication_checker = mock.Mock() mock_smartstack_replication_checker.get_replication_for_instance.return_value = { "fake_provider": { "fake_region": { "test.canary": 1, "test.low_replication": 4, "fake_service.fake_instance": 8, } } } with mock.patch("paasta_tools.monitoring_tools.send_replication_event", autospec=True) as mock_send_replication_event: monitoring_tools.check_replication_for_instance( instance_config=instance_config, expected_count=expected_replication_count, replication_checker=mock_smartstack_replication_checker, dry_run=True, ) mock_send_replication_event.assert_called_once_with( instance_config=instance_config, status=pysensu_yelp.Status.OK, output=mock.ANY, description=mock.ANY, dry_run=True, ) _, send_replication_event_kwargs = mock_send_replication_event.call_args alert_output = send_replication_event_kwargs["output"] assert ( "{} has 8/8 replicas in fake_region according to fake_provider (OK: 100.0%)" .format(instance_config.job_id)) in alert_output
def check_service_replication( instance_config: MarathonServiceConfig, all_tasks_or_pods: Sequence[MarathonTask], replication_checker: MesosSmartstackEnvoyReplicationChecker, ) -> Optional[bool]: """Checks a service's replication levels based on how the service's replication should be monitored. (smartstack/envoy or mesos) :param instance_config: an instance of MarathonServiceConfig :param replication_checker: an instance of MesosSmartstackEnvoyReplicationChecker """ expected_count = instance_config.get_instances() log.info("Expecting %d total tasks for %s" % (expected_count, instance_config.job_id)) proxy_port = get_proxy_port_for_instance(instance_config) registrations = instance_config.get_registrations() # if the primary registration does not match the service_instance name then # the best we can do is check marathon for replication (for now). if proxy_port is not None and registrations[0] == instance_config.job_id: is_well_replicated = monitoring_tools.check_replication_for_instance( instance_config=instance_config, expected_count=expected_count, replication_checker=replication_checker, ) return is_well_replicated else: check_healthy_marathon_tasks_for_service_instance( instance_config=instance_config, expected_count=expected_count, all_tasks=all_tasks_or_pods, ) return None
def test_check_replication_for_instance_crit_when_low_replication( instance_config, ): expected_replication_count = 8 mock_smartstack_replication_checker = mock.Mock() mock_smartstack_replication_checker.get_replication_for_instance.return_value = { "fake_provider": { "fake_region": { "test.canary": 1, "fake_service.fake_instance": 4, "test.fully_replicated": 8, } } } with mock.patch( "paasta_tools.monitoring_tools.send_replication_event", autospec=True ) as mock_send_replication_event: monitoring_tools.check_replication_for_instance( instance_config=instance_config, expected_count=expected_replication_count, replication_checker=mock_smartstack_replication_checker, dry_run=True, ) mock_send_replication_event.assert_called_once_with( instance_config=instance_config, status=pysensu_yelp.Status.CRITICAL, output=mock.ANY, description=mock.ANY, dry_run=True, ) _, send_replication_event_kwargs = mock_send_replication_event.call_args alert_output = send_replication_event_kwargs["output"] assert ( f"{instance_config.job_id} has 4/8 replicas in fake_region" ) in alert_output assert ( "paasta status -s {} -i {} -c {} -vv".format( instance_config.service, instance_config.instance, instance_config.cluster, ) ) in send_replication_event_kwargs["description"]
def check_kubernetes_pod_replication( instance_config: KubernetesDeploymentConfig, all_tasks_or_pods: Sequence[V1Pod], replication_checker: KubeSmartstackEnvoyReplicationChecker, dry_run: bool = False, ) -> Optional[bool]: """Checks a service's replication levels based on how the service's replication should be monitored. (smartstack/envoy or k8s) :param instance_config: an instance of KubernetesDeploymentConfig :param replication_checker: an instance of KubeSmartstackEnvoyReplicationChecker """ default_alert_after = DEFAULT_ALERT_AFTER expected_count = instance_config.get_instances() log.info("Expecting %d total tasks for %s" % (expected_count, instance_config.job_id)) proxy_port = get_proxy_port_for_instance(instance_config) registrations = instance_config.get_registrations() # If this instance does not autoscale and only has 1 instance, set alert after to 20m. # Otherwise, set it to 10 min. if (not instance_config.is_autoscaling_enabled() and instance_config.get_instances() == 1): default_alert_after = "20m" if "monitoring" not in instance_config.config_dict: instance_config.config_dict["monitoring"] = {} instance_config.config_dict["monitoring"][ "alert_after"] = instance_config.config_dict["monitoring"].get( "alert_after", default_alert_after) # if the primary registration does not match the service_instance name then # the best we can do is check k8s for replication (for now). if proxy_port is not None and registrations[0] == instance_config.job_id: is_well_replicated = monitoring_tools.check_replication_for_instance( instance_config=instance_config, expected_count=expected_count, replication_checker=replication_checker, dry_run=dry_run, ) return is_well_replicated else: check_healthy_kubernetes_tasks_for_service_instance( instance_config=instance_config, expected_count=expected_count, all_pods=all_tasks_or_pods, dry_run=dry_run, ) return None