def configure(self, hosts): """Configure the leaf scheduler. :param hosts: list of child hosts :type hosts: list of str """ # Transfer children's constraints from list to set, so searching # elements are more efficient. self._hosts = [] for host in hosts: self._hosts.append(ChildInfo.from_thrift(host)) self._coalesce_resources(self._hosts) if self._health_checker: self._health_checker.stop() if self._enable_health_checker: # initialize health checker with the new set of children. agent_config = common.services.get(ServiceName.AGENT_CONFIG) children = dict((host.id, ServerAddress(host.address, host.port)) for host in self._hosts) self._health_checker = HealthChecker(self._scheduler_id, children, agent_config) self._health_checker.start() self._configured = ConfigStates.INITIALIZED
def test_report(self, client_class, time_class, chairman): """Test that resurrected and missing hosts get reported correctly""" client_class.side_effect = self.create_fake_client chairman.return_value.report_resurrected.return_value = \ ReportResurrectedResponse(result=0) bar_client = MagicMock() baz_client = MagicMock() self._clients["bar"] = bar_client self._clients["baz"] = baz_client children = { "bar": ServerAddress("bar", 1234), "baz": ServerAddress("baz", 1234) } # first ping succeeds for bar and baz. they get reported resurrected. health_checker = HealthChecker("id", children, self.conf) time_class.return_value = 0.0 health_checker._send_heartbeat() health_checker._send_report() req = ReportResurrectedRequest(hosts=['bar', 'baz'], schedulers=None, scheduler_id='id') chairman.return_value.report_resurrected.assert_called_once_with(req) self.assertFalse(chairman.return_value.report_missing.called) self.assertEquals(health_checker._resurrected_children, set(["bar", "baz"])) self.assertEquals(health_checker._missing_children, set()) # call _send_report again. this time nothing should get reported. chairman.reset_mock() health_checker._send_report() self.assertFalse(chairman.return_value.report_missing.called) self.assertFalse(chairman.return_value.report_resurrected.called) # bar goes missing. bar_client.ping.side_effect = Exception() health_checker._send_heartbeat() time_class.return_value = 100.0 chairman.return_value.report_missing.return_value = \ ReportMissingResponse(result=0) health_checker._send_report() req = ReportMissingRequest(hosts=['bar'], schedulers=None, scheduler_id='id') chairman.return_value.report_missing.assert_called_once_with(req) self.assertFalse(chairman.return_value.report_resurrected.called) # bar comes back chairman.reset_mock() bar_client.ping.side_effect = None time_class.return_value = 200.0 health_checker._send_heartbeat() chairman.return_value.report_resurrected.return_value = \ ReportResurrectedResponse(result=0) health_checker._send_report() req = ReportResurrectedRequest(hosts=['bar'], schedulers=None, scheduler_id='id') self.assertFalse(chairman.return_value.report_missing.called) chairman.return_value.report_resurrected.assert_called_once_with(req)
def test_stop(self): """Make sure start() starts threads and stop() stops them""" health_checker = HealthChecker("id", {}, self.conf) self.assertFalse(health_checker._heartbeater.is_alive()) self.assertFalse(health_checker._reporter.is_alive()) health_checker.start() self.assertTrue(health_checker._heartbeater.is_alive()) self.assertTrue(health_checker._reporter.is_alive()) health_checker.stop() self.assertFalse(health_checker._heartbeater.is_alive()) self.assertFalse(health_checker._reporter.is_alive())
def test_heartbeat(self, client_class, time_class): """Test that sequence number and timestamp get updated correctly after sending heartbeat.""" client_class.side_effect = self.create_fake_client bar_client = MagicMock() baz_client = MagicMock() self._clients["bar"] = bar_client self._clients["baz"] = baz_client children = { "bar": ServerAddress("bar", 1234), "baz": ServerAddress("baz", 1234) } # make sure things are initialized properly time_class.return_value = 0.0 health_checker = HealthChecker("id", children, self.conf) self.assertEquals(health_checker._seqnum, 0) self.assertEquals(len(health_checker._last_update), 2) self.assertEquals(health_checker._last_update["bar"], (0, 0.0)) self.assertEquals(health_checker._last_update["baz"], (0, 0.0)) # send a ping time_class.return_value = 10.0 health_checker._send_heartbeat() self.assertEquals(health_checker._seqnum, 1) self.assertEquals(len(health_checker._last_update), 2) self.assertEquals(health_checker._last_update["bar"], (1, 10.0)) self.assertEquals(health_checker._last_update["baz"], (1, 10.0)) # send another ping. ping to baz fails. time_class.return_value = 20.0 baz_client.ping.side_effect = Exception() health_checker._send_heartbeat() self.assertEquals(health_checker._seqnum, 2) self.assertEquals(len(health_checker._last_update), 2) self.assertEquals(health_checker._last_update["bar"], (2, 20.0)) self.assertEquals(health_checker._last_update["baz"], (1, 10.0)) # send another ping. ping to bar fails. time_class.return_value = 30.0 bar_client.ping.side_effect = Exception() baz_client.ping.side_effect = None health_checker._send_heartbeat() self.assertEquals(health_checker._seqnum, 3) self.assertEquals(len(health_checker._last_update), 2) self.assertEquals(health_checker._last_update["bar"], (2, 20.0)) self.assertEquals(health_checker._last_update["baz"], (3, 30.0))
def test_slow_heartbeater(self, client_class, time_class, chairman): """Don't report missing if the current sequence number is equal to the sequence number of the last successful ping. """ client_class.side_effect = self.create_fake_client bar_client = MagicMock() self._clients["bar"] = bar_client children = {"bar": ServerAddress("bar", 1234)} # send a ping. bar should get reported resurrected. health_checker = HealthChecker("id", children, self.conf) time_class.return_value = 0.0 chairman.return_value.report_resurrected.return_value = \ ReportResurrectedResponse(result=0) health_checker._send_heartbeat() health_checker._send_report() req = ReportResurrectedRequest(hosts=['bar'], schedulers=None, scheduler_id='id') chairman.return_value.report_resurrected.assert_called_once_with(req) self.assertFalse(chairman.return_value.report_missing.called) self.assertEquals(health_checker._resurrected_children, set(["bar"])) self.assertEquals(health_checker._missing_children, set()) # call _send_report() again after 100 seconds. bar shouldn't get # reported missing since the heartbeater hasn't send another ping. time_class.return_value = 100.0 chairman.reset_mock() health_checker._send_report() self.assertFalse(chairman.return_value.report_missing.called) self.assertFalse(chairman.return_value.report_resurrected.called) # ping fails. now the reporter should report bar missing. bar_client.ping.side_effect = Exception() chairman.return_value.report_missing.return_value = \ ReportMissingResponse(result=0) health_checker._send_heartbeat() health_checker._send_report() req = ReportMissingRequest(hosts=['bar'], schedulers=None, scheduler_id='id') chairman.return_value.report_missing.assert_called_once_with(req) self.assertFalse(chairman.return_value.report_resurrected.called)
def test_chairman_failure(self, client_class, time_class, chairman): """Reporter should retry reporting if chairman fails.""" client_class.side_effect = self.create_fake_client bar_client = MagicMock() self._clients["bar"] = bar_client children = {"bar": ServerAddress("bar", 1234)} # report_resurrected returns a non-zero value health_checker = HealthChecker("id", children, self.conf) time_class.return_value = 0.0 health_checker._send_heartbeat() chairman.return_value.report_resurrected.return_value = \ ReportResurrectedResponse(result=1) health_checker._send_report() req = ReportResurrectedRequest(hosts=['bar'], schedulers=None, scheduler_id='id') chairman.return_value.report_resurrected.assert_called_once_with(req) self.assertFalse(chairman.return_value.report_missing.called) # report_resurrected throws an exception chairman.reset_mock() chairman.return_value.report_resurrected.side_effect = Exception() health_checker._send_report() chairman.return_value.report_resurrected.assert_called_once_with(req) self.assertFalse(chairman.return_value.report_missing.called) # report succeeds chairman.reset_mock() chairman.return_value.report_resurrected.side_effect = None chairman.return_value.report_resurrected.return_value = \ ReportResurrectedResponse(result=0) health_checker._send_report() chairman.return_value.report_resurrected.assert_called_once_with(req) self.assertFalse(chairman.return_value.report_missing.called) # report doesn't get called anymore. chairman.reset_mock() health_checker._send_report() self.assertFalse(chairman.return_value.report_resurrected.called) self.assertFalse(chairman.return_value.report_missing.called)