def emit(self, record): from monitor import ServiceMonitor ServiceMonitor.monitor(exception=record.exc_info[0] if record.exc_info and record.exc_info[0] else None) ########################################################################
def test_db_failover_and_recovery(self): """ Tests that the slave fails over to the default connection settings when the slave goes down. Then tests that the original connection is restored once the service comes back up. """ # Assert the presence of the marker in the slave connection connection = connections["slave"] self.assertEqual(connection.settings_dict["MARKER"], "slave") # Simulate the outage and run the monitoring self.simulate_service_outage(DBSlave) ServiceMonitor.monitor() # Verify the results connection = connections["slave"] self.assertNotIn("MARKER", connection.settings_dict) # Simulate recovery self.simulate_service_recovery(DBSlave) ServiceMonitor.monitor() # Verify the results connection = connections["slave"] self.assertIn("MARKER", connection.settings_dict) self.assertEqual(connection.settings_dict["MARKER"], "slave")
def emit(self, record): from monitor import ServiceMonitor ServiceMonitor.monitor( exception=record.exc_info[0] if record.exc_info and record.exc_info[0] else None ) ########################################################################
def process_request(self, request): ServiceMonitor.monitor() for service_class in ServiceMonitor.outages: if hasattr(service_class.outage_middleware_class, "process_request"): middleware = service_class.outage_middleware_class() response = middleware.process_request(request) if response: return response
def test_ping_monitoring_frequency(self): """ Tests the ping frequency during normal monitoring. """ DBSlave.MONITORING_PING_FREQUENCY = 1 for i in range(3): ServiceMonitor.monitor() self.assertEqual(len(DBSlave.pings), 1) time.sleep(1) ServiceMonitor.monitor() self.assertEqual(len(DBSlave.pings), 2)
def test_ping_error_frequency(self): """ Tests the ping frequency when an error is passed to the ServiceMonitor. """ self.simulate_service_outage(DBSlave) DBSlave.MONITORING_PING_FREQUENCY = 3 DBSlave.OUTAGE_PING_FREQUENCY = 3 DBSlave.ERROR_PING_FREQUENCY = 0 for i in range(3): ServiceMonitor.monitor(exception=socket.error()) self.assertEqual(len(DBSlave.pings), 3) self.simulate_service_recovery(DBSlave) ServiceMonitor.monitor()
def test_ping_outage_frequency(self): """ Tests the ping frequency during an outage. """ self.simulate_service_outage(DBSlave) DBSlave.MONITORING_PING_FREQUENCY = 3 DBSlave.OUTAGE_PING_FREQUENCY = 0 for i in range(3): ServiceMonitor.monitor() self.assertEqual(len(DBSlave.pings), 3) self.simulate_service_recovery(DBSlave) ServiceMonitor.monitor()
def test_ping_error_frequency(self): """ Tests the ping frequency when an error is passed to the ServiceMonitor. """ self.simulate_service_outage(DBSlave) DBSlave.MONITORING_PING_FREQUENCY = 3 DBSlave.OUTAGE_PING_FREQUENCY = 3 DBSlave.ERROR_PING_FREQUENCY= 0 for i in range(3): ServiceMonitor.monitor(exception=socket.error()) self.assertEqual(len(DBSlave.pings), 3) self.simulate_service_recovery(DBSlave) ServiceMonitor.monitor()
def test_celery_failover_and_recovery(self): """ Tests that celery fails over to ALWAYS_EAGER when the broker goes down. Then tests that ALWAYS_EAGER is restored to False once the service comes back up. """ # Assert the intitial settings self.assertFalse(settings.CELERY_ALWAYS_EAGER) # Simulate the outage and run the monitoring self.simulate_service_outage(Celery) ServiceMonitor.monitor() # Verify the results self.assertTrue(settings.CELERY_ALWAYS_EAGER) # Simulate recovery self.simulate_service_recovery(Celery) ServiceMonitor.monitor() # Verify the results self.assertFalse(settings.CELERY_ALWAYS_EAGER)
def test_memcached_log_outage_and_recovery(self): """Tests that the memcached outage is logged when the outage is discovered, and periodically thereafter, and that the recovery is also logged. Memcached doesn't require any failover, so the best way to test Memcached is to make sure an outage notification and recovery notification are sent. """ orig_frequency = ServiceMonitor.OUTAGE_LOGGING_FREQUENCY ServiceMonitor.OUTAGE_LOGGING_FREQUENCY = 1 try: # Simulate the outage and run the monitoring self.simulate_service_outage(Memcached) # This should log the outage, but only once for i in range(3): ServiceMonitor.monitor() # Verify the results self.assertEqual(len(LogCaptureHandler.records), 1) record = LogCaptureHandler.records[0] self.assertEqual(record.levelno, logging.CRITICAL) self.assertIn( "{0} outage. Failover initiated.".format(Memcached.name), record.msg) # Sleep long enough to log the outage again. time.sleep(1) ServiceMonitor.monitor() # Verify the results. self.assertEqual(len(LogCaptureHandler.records), 2) record = LogCaptureHandler.records[1] self.assertEqual(record.levelno, logging.CRITICAL) # Simulate recovery self.simulate_service_recovery(Memcached) ServiceMonitor.monitor() # Verify the results self.assertEqual(len(LogCaptureHandler.records), 3) record = LogCaptureHandler.records[2] self.assertEqual(record.levelno, logging.INFO) self.assertIn( "{0} is back up. Recovery complete.".format(Memcached.name), record.msg) finally: ServiceMonitor.OUTAGE_LOGGING_FREQUENCY = orig_frequency
def setUpDBSlave(self): ServiceMonitor.register(DBSlave) self._orig_db_settings = settings.DATABASES.copy() settings.DATABASES["slave"] = settings.DATABASES["default"].copy() settings.DATABASES["slave"]["MARKER"] = "slave" DBSlave.reload_settings()
def setUpMemcached(self): ServiceMonitor.register(Memcached)
def setUpCelery(self): ServiceMonitor.register(Celery) self._orig_celery_always_eager = getattr(settings, "CELERY_ALWAYS_EAGER", False) settings.CELERY_ALWAYS_EAGER = False
def wrapper(*args, **kwargs): ServiceMonitor.monitor(outages_only=True) return function(*args, **kwargs)
class FailoverTestCase(TestCase): #################################################################### def setUp(self): """Adds a slave connection to settings.DATABASES, which uses the sames settings as the default connection but includes a marker to distinguish it. Registers the test service class. """ self.orig_services = ServiceMonitor.services ServiceMonitor.services.clear() self.setUpDBSlave() if test_memcached: self.setUpMemcached() if test_celery: self.setUpCelery() for service_class in ServiceMonitor.services: self.patch_ping(service_class) # Set ping frequencies to 0 (ping every time) service_class._orig_monitoring_frequency = service_class.MONITORING_PING_FREQUENCY service_class._orig_outage_frequency = service_class.OUTAGE_PING_FREQUENCY service_class._orig_error_frequency = service_class.ERROR_PING_FREQUENCY service_class.MONITORING_PING_FREQUENCY = 0 service_class.OUTAGE_PING_FREQUENCY = 0 service_class.ERROR_PING_FREQUENCY = 0 # Register socket.error as an exception class that should trigger # monitoring. self.orig_failover_exception_classes = failover_settings.OUTAGE_EXCEPTION_CLASSES failover_settings.OUTAGE_EXCEPTION_CLASSES = (socket.error, ) # Set up a logger tied to the FailoverHandler. self.logger = logging.getLogger("failover_test") self.logger.setLevel(logging.ERROR) self.log_handler = FailoverHandler() self.logger.addHandler(self.log_handler) self.log_capture_handler = LogCaptureHandler() monitor_logger.addHandler(self.log_capture_handler) self.orig_monitor_logger_level = monitor_logger.level monitor_logger.setLevel(logging.INFO) LogCaptureHandler.records = [] #################################################################### def setUpDBSlave(self): ServiceMonitor.register(DBSlave) self._orig_db_settings = settings.DATABASES.copy() settings.DATABASES["slave"] = settings.DATABASES["default"].copy() settings.DATABASES["slave"]["MARKER"] = "slave" DBSlave.reload_settings() #################################################################### def setUpCelery(self): ServiceMonitor.register(Celery) self._orig_celery_always_eager = getattr(settings, "CELERY_ALWAYS_EAGER", False) settings.CELERY_ALWAYS_EAGER = False #################################################################### def setUpMemcached(self): ServiceMonitor.register(Memcached) #################################################################### def tearDown(self): for service_class in ServiceMonitor.services: service_class.ping = service_class._orig_ping delattr(service_class, "_orig_ping") delattr(service_class, "pings") service_class.MONITORING_PING_FREQUENCY = service_class._orig_monitoring_frequency service_class.OUTAGE_PING_FREQUENCY = service_class._orig_outage_frequency service_class.ERROR_PING_FREQUENCY = service_class._orig_error_frequency delattr(service_class, "_orig_monitoring_frequency") delattr(service_class, "_orig_outage_frequency") delattr(service_class, "_orig_error_frequency") # Clear the last_ping from each service class so as not to impact the # next test. service_class.last_ping = None self.tearDownDBSlave() if test_memcached: self.tearDownMemcached() if test_celery: self.tearDownCelery() ServiceMonitor.services = self.orig_services failover_settings.OUTAGE_EXCEPTION_CLASSES = self.orig_failover_exception_classes self.logger.removeHandler(self.log_handler) monitor_logger.removeHandler(self.log_capture_handler) monitor_logger.setLevel(self.orig_monitor_logger_level) LogCaptureHandler.records = [] #################################################################### def tearDownDBSlave(self): settings.DATABASES = self._orig_db_settings #################################################################### def tearDownCelery(self): settings.CELERY_ALWAYS_EAGER = self._orig_celery_always_eager #################################################################### def tearDownMemcached(self): pass #################################################################### def patch_ping(self, service_class): """Patches the ping method to store the datetime of each ping. """ orig_ping = service_class.ping def patched_ping(*args, **kwargs): service_class.pings.append(datetime.datetime.now()) return orig_ping(*args, **kwargs) if not hasattr(service_class, '_orig_ping'): service_class._orig_ping = service_class.ping service_class.pings = [] service_class.ping = patched_ping #################################################################### def simulate_service_outage(self, service_class): """Patches the database service ping method to raise a socket error. """ orig_ping = service_class.ping def error_ping(*args, **kwargs): orig_ping(*args, **kwargs) raise socket.error() if not hasattr(service_class, '_orig_ping'): service_class._orig_ping = service_class.ping service_class.ping = error_ping #################################################################### def simulate_service_recovery(self, service_class): """Restores the original ping method of the service class. """ service_class.ping = service_class._orig_ping #################################################################### def test_db_failover_and_recovery(self): """ Tests that the slave fails over to the default connection settings when the slave goes down. Then tests that the original connection is restored once the service comes back up. """ # Assert the presence of the marker in the slave connection connection = connections["slave"] self.assertEqual(connection.settings_dict["MARKER"], "slave") # Simulate the outage and run the monitoring self.simulate_service_outage(DBSlave) ServiceMonitor.monitor() # Verify the results connection = connections["slave"] self.assertNotIn("MARKER", connection.settings_dict) # Simulate recovery self.simulate_service_recovery(DBSlave) ServiceMonitor.monitor() # Verify the results connection = connections["slave"] self.assertIn("MARKER", connection.settings_dict) self.assertEqual(connection.settings_dict["MARKER"], "slave") #################################################################### @skipUnless(test_celery, "Not using celery") def test_celery_failover_and_recovery(self): """ Tests that celery fails over to ALWAYS_EAGER when the broker goes down. Then tests that ALWAYS_EAGER is restored to False once the service comes back up. """ # Assert the intitial settings self.assertFalse(settings.CELERY_ALWAYS_EAGER) # Simulate the outage and run the monitoring self.simulate_service_outage(Celery) ServiceMonitor.monitor() # Verify the results self.assertTrue(settings.CELERY_ALWAYS_EAGER) # Simulate recovery self.simulate_service_recovery(Celery) ServiceMonitor.monitor() # Verify the results self.assertFalse(settings.CELERY_ALWAYS_EAGER) #################################################################### def test_exception_logging_failover(self): """ Tests that the slave fails over to the default connection settings when the slave goes down and a suspicious exception triggers monitoring. """ # Assert the presence of the marker in the slave connection connection = connections["slave"] self.assertEqual(connection.settings_dict["MARKER"], "slave") # Simulate the outage self.simulate_service_outage(DBSlave) # Raise an error that triggers monitoring try: raise socket.error() except Exception, e: self.logger.error(e, exc_info=e) # Verify the results connection = connections["slave"] self.assertNotIn("MARKER", connection.settings_dict) # Simulate recovery self.simulate_service_recovery(DBSlave) ServiceMonitor.monitor() # Verify the results connection = connections["slave"] self.assertIn("MARKER", connection.settings_dict) self.assertEqual(connection.settings_dict["MARKER"], "slave")
def wrapper(*args, **kwargs): ServiceMonitor.monitor() return function(*args, **kwargs)