Exemple #1
0
    def configure(self, hosts):
        """Configure the leaf scheduler.

        :param hosts: list of child hosts
        :type hosts: list of str
        """
        # Transfer children's constraints from list to set, so searching
        # elements are more efficient.
        self._hosts = []

        for host in hosts:
            self._hosts.append(ChildInfo.from_thrift(host))

        self._coalesce_resources(self._hosts)

        if self._health_checker:
            self._health_checker.stop()
        if self._enable_health_checker:
            # initialize health checker with the new set of children.
            agent_config = common.services.get(ServiceName.AGENT_CONFIG)
            children = dict((host.id, ServerAddress(host.address, host.port))
                            for host in self._hosts)
            self._health_checker = HealthChecker(self._scheduler_id, children,
                                                 agent_config)
            self._health_checker.start()
        self._configured = ConfigStates.INITIALIZED
 def test_stop(self):
     """Make sure start() starts threads and stop() stops them"""
     health_checker = HealthChecker("id", {}, self.conf)
     self.assertFalse(health_checker._heartbeater.is_alive())
     self.assertFalse(health_checker._reporter.is_alive())
     health_checker.start()
     self.assertTrue(health_checker._heartbeater.is_alive())
     self.assertTrue(health_checker._reporter.is_alive())
     health_checker.stop()
     self.assertFalse(health_checker._heartbeater.is_alive())
     self.assertFalse(health_checker._reporter.is_alive())
    def configure(self, hosts):
        """Configure the leaf scheduler.

        :param hosts: list of child hosts
        :type hosts: list of str
        """
        # Transfer children's constraints from list to set, so searching
        # elements are more efficient.
        self._hosts = []

        for host in hosts:
            self._hosts.append(ChildInfo.from_thrift(host))

        self._coalesce_resources(self._hosts)

        if self._health_checker:
            self._health_checker.stop()
        if self._enable_health_checker:
            # initialize health checker with the new set of children.
            agent_config = common.services.get(ServiceName.AGENT_CONFIG)
            children = dict((host.id, ServerAddress(host.address, host.port))
                            for host in self._hosts)
            self._health_checker = HealthChecker(self._scheduler_id, children,
                                                 agent_config)
            self._health_checker.start()
        self._configured = ConfigStates.INITIALIZED
Exemple #4
0
    def test_heartbeat(self, client_class, time_class):
        """Test that sequence number and timestamp get updated correctly
           after sending heartbeat."""
        client_class.side_effect = self.create_fake_client
        bar_client = MagicMock()
        baz_client = MagicMock()
        self._clients["bar"] = bar_client
        self._clients["baz"] = baz_client

        children = {
            "bar": ServerAddress("bar", 1234),
            "baz": ServerAddress("baz", 1234)
        }

        # make sure things are initialized properly
        time_class.return_value = 0.0
        health_checker = HealthChecker("id", children, self.conf)
        self.assertEquals(health_checker._seqnum, 0)
        self.assertEquals(len(health_checker._last_update), 2)
        self.assertEquals(health_checker._last_update["bar"], (0, 0.0))
        self.assertEquals(health_checker._last_update["baz"], (0, 0.0))

        # send a ping
        time_class.return_value = 10.0
        health_checker._send_heartbeat()
        self.assertEquals(health_checker._seqnum, 1)
        self.assertEquals(len(health_checker._last_update), 2)
        self.assertEquals(health_checker._last_update["bar"], (1, 10.0))
        self.assertEquals(health_checker._last_update["baz"], (1, 10.0))

        # send another ping. ping to baz fails.
        time_class.return_value = 20.0
        baz_client.ping.side_effect = Exception()
        health_checker._send_heartbeat()
        self.assertEquals(health_checker._seqnum, 2)
        self.assertEquals(len(health_checker._last_update), 2)
        self.assertEquals(health_checker._last_update["bar"], (2, 20.0))
        self.assertEquals(health_checker._last_update["baz"], (1, 10.0))

        # send another ping. ping to bar fails.
        time_class.return_value = 30.0
        bar_client.ping.side_effect = Exception()
        baz_client.ping.side_effect = None
        health_checker._send_heartbeat()
        self.assertEquals(health_checker._seqnum, 3)
        self.assertEquals(len(health_checker._last_update), 2)
        self.assertEquals(health_checker._last_update["bar"], (2, 20.0))
        self.assertEquals(health_checker._last_update["baz"], (3, 30.0))
Exemple #5
0
 def test_stop(self):
     """Make sure start() starts threads and stop() stops them"""
     health_checker = HealthChecker("id", {}, self.conf)
     self.assertFalse(health_checker._heartbeater.is_alive())
     self.assertFalse(health_checker._reporter.is_alive())
     health_checker.start()
     self.assertTrue(health_checker._heartbeater.is_alive())
     self.assertTrue(health_checker._reporter.is_alive())
     health_checker.stop()
     self.assertFalse(health_checker._heartbeater.is_alive())
     self.assertFalse(health_checker._reporter.is_alive())
    def test_heartbeat(self, client_class, time_class):
        """Test that sequence number and timestamp get updated correctly
           after sending heartbeat."""
        client_class.side_effect = self.create_fake_client
        bar_client = MagicMock()
        baz_client = MagicMock()
        self._clients["bar"] = bar_client
        self._clients["baz"] = baz_client

        children = {"bar": ServerAddress("bar", 1234),
                    "baz": ServerAddress("baz", 1234)}

        # make sure things are initialized properly
        time_class.return_value = 0.0
        health_checker = HealthChecker("id", children, self.conf)
        self.assertEquals(health_checker._seqnum, 0)
        self.assertEquals(len(health_checker._last_update), 2)
        self.assertEquals(health_checker._last_update["bar"], (0, 0.0))
        self.assertEquals(health_checker._last_update["baz"], (0, 0.0))

        # send a ping
        time_class.return_value = 10.0
        health_checker._send_heartbeat()
        self.assertEquals(health_checker._seqnum, 1)
        self.assertEquals(len(health_checker._last_update), 2)
        self.assertEquals(health_checker._last_update["bar"], (1, 10.0))
        self.assertEquals(health_checker._last_update["baz"], (1, 10.0))

        # send another ping. ping to baz fails.
        time_class.return_value = 20.0
        baz_client.ping.side_effect = Exception()
        health_checker._send_heartbeat()
        self.assertEquals(health_checker._seqnum, 2)
        self.assertEquals(len(health_checker._last_update), 2)
        self.assertEquals(health_checker._last_update["bar"], (2, 20.0))
        self.assertEquals(health_checker._last_update["baz"], (1, 10.0))

        # send another ping. ping to bar fails.
        time_class.return_value = 30.0
        bar_client.ping.side_effect = Exception()
        baz_client.ping.side_effect = None
        health_checker._send_heartbeat()
        self.assertEquals(health_checker._seqnum, 3)
        self.assertEquals(len(health_checker._last_update), 2)
        self.assertEquals(health_checker._last_update["bar"], (2, 20.0))
        self.assertEquals(health_checker._last_update["baz"], (3, 30.0))
Exemple #7
0
class LeafScheduler(BaseScheduler):
    """Leaf scheduler manages child hosts."""
    def __init__(self, scheduler_id, ut_ratio, enable_health_checker=True):
        """Create a new leaf scheduler.

        :param scheduler_id: scheduler id
        :type scheduler_id: str
        :type enable_health_checker: enables health checking of children.
        """
        self._logger = logging.getLogger(__name__)
        self._logger.info("Creating leaf scheduler: %s" % scheduler_id)
        self.lock = threading.RLock()
        self._latch = CountUpDownLatch()
        self._place_strategy = RandomSubsetStrategy(PLACE_FAN_OUT_RATIO,
                                                    MIN_PLACE_FAN_OUT,
                                                    MAX_PLACE_FAN_OUT)
        self._scheduler_id = scheduler_id
        self._hosts = []
        self._scorer = DefaultScorer(ut_ratio)
        self._threadpool = None
        self._initialize_services(scheduler_id)
        self._health_checker = None
        self._enable_health_checker = enable_health_checker
        self._configured = ConfigStates.UNINITIALIZED

    def _initialize_services(self, scheduler_id):
        self._threadpool = common.services.get(ThreadPoolExecutor)
        self._scheduler_client = SchedulerClient()

    @locked
    def configure(self, hosts):
        """Configure the leaf scheduler.

        :param hosts: list of child hosts
        :type hosts: list of str
        """
        # Transfer children's constraints from list to set, so searching
        # elements are more efficient.
        self._hosts = []

        for host in hosts:
            self._hosts.append(ChildInfo.from_thrift(host))

        self._coalesce_resources(self._hosts)

        if self._health_checker:
            self._health_checker.stop()
        if self._enable_health_checker:
            # initialize health checker with the new set of children.
            agent_config = common.services.get(ServiceName.AGENT_CONFIG)
            children = dict((host.id, ServerAddress(host.address, host.port))
                            for host in self._hosts)
            self._health_checker = HealthChecker(self._scheduler_id, children,
                                                 agent_config)
            self._health_checker.start()
        self._configured = ConfigStates.INITIALIZED

    @locked
    def _get_hosts(self):
        """
        Get the list of hosts for this scheduler.
        The returned list is a deep copy of the set of hosts so a subsequent
        call to configure the host is not stepping on calls in flight.
        Assumes the host is configured as a leaf scheduler.
        :rtype: list of str
        """
        return list(self._hosts)

    def mark_pending(func):
        """
        Decorator for bumping up the pending count for calls that are inflight.
        """
        @log_request(log_level=logging.debug)
        def nested(self, *args, **kwargs):
            self._latch.count_up()
            self._logger.debug("latch counted up to: {0}".format(
                self._latch.count))
            try:
                return func(self, *args, **kwargs)
            finally:
                self._latch.count_down()
                self._logger.debug("latch counted down to: {0}".format(
                    self._latch.count))

        return nested

    @mark_pending
    def find(self, request):
        """Find the specified resource.

        :type request: FindRequest
        :rtype: FindResponse
        :raise: InvalidScheduler
        """
        if self._configured == ConfigStates.UNINITIALIZED:
            raise InvalidScheduler()

        # Host service only has a single scheduler
        request.scheduler_id = None

        futures = []
        for agent in self._get_hosts():
            future = self._threadpool.submit(self._find_worker, agent.address,
                                             agent.port, agent.id, request)
            futures.append(future)

        done, not_done = concurrent.futures.wait(futures, timeout=FIND_TIMEOUT)
        self._logger.info("Find responses received: %d, timed out: %d",
                          len(done), len(not_done))

        for future in done:
            response = future.result()
            if response.result == FindResultCode.OK:
                return response

        return FindResponse(FindResultCode.NOT_FOUND)

    @mark_pending
    def place(self, request):
        """Place the specified resources.

        :type request: PlaceRequest
        :rtype: PlaceResponse
        :raise: InvalidScheduler
        """
        if self._configured == ConfigStates.UNINITIALIZED:
            raise InvalidScheduler()

        request.scheduler_id = None

        constraints = self._collect_constraints(request.resource)
        selected = self._placement_hosts(request, constraints)
        if len(selected) == 0:
            return PlaceResponse(PlaceResultCode.NO_SUCH_RESOURCE)

        selected = self._filter_missing_hosts(selected)
        done = self._execute_placement(selected, request)

        responses = []
        no_such_resource = False
        not_enough_memory_resource = False
        not_enough_cpu_resource = False
        not_enough_datastore_capacity = False

        for future in done:
            try:
                response = future.result()
                if response.result == PlaceResultCode.OK:
                    responses.append(response)
                elif response.result == \
                        PlaceResultCode.NOT_ENOUGH_CPU_RESOURCE:
                    not_enough_cpu_resource = True
                elif response.result == \
                        PlaceResultCode.NOT_ENOUGH_MEMORY_RESOURCE:
                    not_enough_memory_resource = True
                elif response.result == \
                        PlaceResultCode.NOT_ENOUGH_DATASTORE_CAPACITY:
                    not_enough_datastore_capacity = True
                elif response.result == \
                        PlaceResultCode.NO_SUCH_RESOURCE:
                    no_such_resource = True
            except Exception, e:
                self._logger.warning(
                    "Caught exception while sending "
                    "place request: %s", str(e))

        best_response = self._scorer.score(responses)

        if best_response is not None:
            return best_response
        elif not_enough_cpu_resource:
            return PlaceResponse(PlaceResultCode.NOT_ENOUGH_CPU_RESOURCE)
        elif not_enough_memory_resource:
            return PlaceResponse(PlaceResultCode.NOT_ENOUGH_MEMORY_RESOURCE)
        elif not_enough_datastore_capacity:
            return PlaceResponse(PlaceResultCode.NOT_ENOUGH_DATASTORE_CAPACITY)
        elif no_such_resource:
            return PlaceResponse(PlaceResultCode.NO_SUCH_RESOURCE)
        else:
            return PlaceResponse(PlaceResultCode.SYSTEM_ERROR)
Exemple #8
0
    def test_slow_heartbeater(self, client_class, time_class, chairman):
        """Don't report missing if the current sequence number is equal to
           the sequence number of the last successful ping.
        """
        client_class.side_effect = self.create_fake_client
        bar_client = MagicMock()
        self._clients["bar"] = bar_client
        children = {"bar": ServerAddress("bar", 1234)}

        # send a ping. bar should get reported resurrected.
        health_checker = HealthChecker("id", children, self.conf)
        time_class.return_value = 0.0
        chairman.return_value.report_resurrected.return_value = \
            ReportResurrectedResponse(result=0)
        health_checker._send_heartbeat()
        health_checker._send_report()
        req = ReportResurrectedRequest(hosts=['bar'],
                                       schedulers=None,
                                       scheduler_id='id')
        chairman.return_value.report_resurrected.assert_called_once_with(req)
        self.assertFalse(chairman.return_value.report_missing.called)
        self.assertEquals(health_checker._resurrected_children, set(["bar"]))
        self.assertEquals(health_checker._missing_children, set())

        # call _send_report() again after 100 seconds. bar shouldn't get
        # reported missing since the heartbeater hasn't send another ping.
        time_class.return_value = 100.0
        chairman.reset_mock()
        health_checker._send_report()
        self.assertFalse(chairman.return_value.report_missing.called)
        self.assertFalse(chairman.return_value.report_resurrected.called)

        # ping fails. now the reporter should report bar missing.
        bar_client.ping.side_effect = Exception()
        chairman.return_value.report_missing.return_value = \
            ReportMissingResponse(result=0)
        health_checker._send_heartbeat()
        health_checker._send_report()
        req = ReportMissingRequest(hosts=['bar'],
                                   schedulers=None,
                                   scheduler_id='id')
        chairman.return_value.report_missing.assert_called_once_with(req)
        self.assertFalse(chairman.return_value.report_resurrected.called)
Exemple #9
0
    def test_chairman_failure(self, client_class, time_class, chairman):
        """Reporter should retry reporting if chairman fails."""
        client_class.side_effect = self.create_fake_client
        bar_client = MagicMock()
        self._clients["bar"] = bar_client
        children = {"bar": ServerAddress("bar", 1234)}

        # report_resurrected returns a non-zero value
        health_checker = HealthChecker("id", children, self.conf)
        time_class.return_value = 0.0
        health_checker._send_heartbeat()
        chairman.return_value.report_resurrected.return_value = \
            ReportResurrectedResponse(result=1)
        health_checker._send_report()
        req = ReportResurrectedRequest(hosts=['bar'],
                                       schedulers=None,
                                       scheduler_id='id')
        chairman.return_value.report_resurrected.assert_called_once_with(req)
        self.assertFalse(chairman.return_value.report_missing.called)

        # report_resurrected throws an exception
        chairman.reset_mock()
        chairman.return_value.report_resurrected.side_effect = Exception()
        health_checker._send_report()
        chairman.return_value.report_resurrected.assert_called_once_with(req)
        self.assertFalse(chairman.return_value.report_missing.called)

        # report succeeds
        chairman.reset_mock()
        chairman.return_value.report_resurrected.side_effect = None
        chairman.return_value.report_resurrected.return_value = \
            ReportResurrectedResponse(result=0)
        health_checker._send_report()
        chairman.return_value.report_resurrected.assert_called_once_with(req)
        self.assertFalse(chairman.return_value.report_missing.called)

        # report doesn't get called anymore.
        chairman.reset_mock()
        health_checker._send_report()
        self.assertFalse(chairman.return_value.report_resurrected.called)
        self.assertFalse(chairman.return_value.report_missing.called)
Exemple #10
0
    def test_report(self, client_class, time_class, chairman):
        """Test that resurrected and missing hosts get reported correctly"""
        client_class.side_effect = self.create_fake_client
        chairman.return_value.report_resurrected.return_value = \
            ReportResurrectedResponse(result=0)
        bar_client = MagicMock()
        baz_client = MagicMock()
        self._clients["bar"] = bar_client
        self._clients["baz"] = baz_client
        children = {
            "bar": ServerAddress("bar", 1234),
            "baz": ServerAddress("baz", 1234)
        }

        # first ping succeeds for bar and baz. they get reported resurrected.
        health_checker = HealthChecker("id", children, self.conf)
        time_class.return_value = 0.0
        health_checker._send_heartbeat()
        health_checker._send_report()
        req = ReportResurrectedRequest(hosts=['bar', 'baz'],
                                       schedulers=None,
                                       scheduler_id='id')
        chairman.return_value.report_resurrected.assert_called_once_with(req)
        self.assertFalse(chairman.return_value.report_missing.called)
        self.assertEquals(health_checker._resurrected_children,
                          set(["bar", "baz"]))
        self.assertEquals(health_checker._missing_children, set())

        # call _send_report again. this time nothing should get reported.
        chairman.reset_mock()
        health_checker._send_report()
        self.assertFalse(chairman.return_value.report_missing.called)
        self.assertFalse(chairman.return_value.report_resurrected.called)

        # bar goes missing.
        bar_client.ping.side_effect = Exception()
        health_checker._send_heartbeat()
        time_class.return_value = 100.0
        chairman.return_value.report_missing.return_value = \
            ReportMissingResponse(result=0)
        health_checker._send_report()
        req = ReportMissingRequest(hosts=['bar'],
                                   schedulers=None,
                                   scheduler_id='id')
        chairman.return_value.report_missing.assert_called_once_with(req)
        self.assertFalse(chairman.return_value.report_resurrected.called)

        # bar comes back
        chairman.reset_mock()
        bar_client.ping.side_effect = None
        time_class.return_value = 200.0
        health_checker._send_heartbeat()
        chairman.return_value.report_resurrected.return_value = \
            ReportResurrectedResponse(result=0)
        health_checker._send_report()
        req = ReportResurrectedRequest(hosts=['bar'],
                                       schedulers=None,
                                       scheduler_id='id')
        self.assertFalse(chairman.return_value.report_missing.called)
        chairman.return_value.report_resurrected.assert_called_once_with(req)
class LeafScheduler(BaseScheduler):
    """Leaf scheduler manages child hosts."""

    def __init__(self, scheduler_id, ut_ratio, enable_health_checker=True):
        """Create a new leaf scheduler.

        :param scheduler_id: scheduler id
        :type scheduler_id: str
        :type enable_health_checker: enables health checking of children.
        """
        self._logger = logging.getLogger(__name__)
        self._logger.info("Creating leaf scheduler: %s" % scheduler_id)
        self.lock = threading.RLock()
        self._latch = CountUpDownLatch()
        self._place_strategy = RandomSubsetStrategy(PLACE_FAN_OUT_RATIO,
                                                    MIN_PLACE_FAN_OUT,
                                                    MAX_PLACE_FAN_OUT)
        self._scheduler_id = scheduler_id
        self._hosts = []
        self._scorer = DefaultScorer(ut_ratio)
        self._threadpool = None
        self._initialize_services(scheduler_id)
        self._health_checker = None
        self._enable_health_checker = enable_health_checker
        self._configured = ConfigStates.UNINITIALIZED

    def _initialize_services(self, scheduler_id):
        self._threadpool = common.services.get(ThreadPoolExecutor)
        self._scheduler_client = SchedulerClient()

    @locked
    def configure(self, hosts):
        """Configure the leaf scheduler.

        :param hosts: list of child hosts
        :type hosts: list of str
        """
        # Transfer children's constraints from list to set, so searching
        # elements are more efficient.
        self._hosts = []

        for host in hosts:
            self._hosts.append(ChildInfo.from_thrift(host))

        self._coalesce_resources(self._hosts)

        if self._health_checker:
            self._health_checker.stop()
        if self._enable_health_checker:
            # initialize health checker with the new set of children.
            agent_config = common.services.get(ServiceName.AGENT_CONFIG)
            children = dict((host.id, ServerAddress(host.address, host.port))
                            for host in self._hosts)
            self._health_checker = HealthChecker(self._scheduler_id, children,
                                                 agent_config)
            self._health_checker.start()
        self._configured = ConfigStates.INITIALIZED

    @locked
    def _get_hosts(self):
        """
        Get the list of hosts for this scheduler.
        The returned list is a deep copy of the set of hosts so a subsequent
        call to configure the host is not stepping on calls in flight.
        Assumes the host is configured as a leaf scheduler.
        :rtype: list of str
        """
        return list(self._hosts)

    def mark_pending(func):
        """
        Decorator for bumping up the pending count for calls that are inflight.
        """
        @log_request(log_level=logging.debug)
        def nested(self, *args, **kwargs):
            self._latch.count_up()
            self._logger.debug(
                "latch counted up to: {0}".format(self._latch.count))
            try:
                return func(self, *args, **kwargs)
            finally:
                self._latch.count_down()
                self._logger.debug(
                    "latch counted down to: {0}".format(self._latch.count))
        return nested

    @mark_pending
    def find(self, request):
        """Find the specified resource.

        :type request: FindRequest
        :rtype: FindResponse
        :raise: InvalidScheduler
        """
        if self._configured == ConfigStates.UNINITIALIZED:
            raise InvalidScheduler()

        # Host service only has a single scheduler
        request.scheduler_id = None

        futures = []
        for agent in self._get_hosts():
            future = self._threadpool.submit(
                self._find_worker, agent.address, agent.port,
                agent.id, request)
            futures.append(future)

        done, not_done = concurrent.futures.wait(futures, timeout=FIND_TIMEOUT)
        self._logger.info("Find responses received: %d, timed out: %d",
                          len(done), len(not_done))

        for future in done:
            response = future.result()
            if response.result == FindResultCode.OK:
                return response

        return FindResponse(FindResultCode.NOT_FOUND)

    @mark_pending
    def place(self, request):
        """Place the specified resources.

        :type request: PlaceRequest
        :rtype: PlaceResponse
        :raise: InvalidScheduler
        """
        if self._configured == ConfigStates.UNINITIALIZED:
            raise InvalidScheduler()

        request.scheduler_id = None

        constraints = self._collect_constraints(request.resource)
        selected = self._placement_hosts(request, constraints)
        if len(selected) == 0:
            return PlaceResponse(PlaceResultCode.NO_SUCH_RESOURCE)

        selected = self._filter_missing_hosts(selected)
        done = self._execute_placement(selected, request)

        responses = []
        no_such_resource = False
        not_enough_memory_resource = False
        not_enough_cpu_resource = False
        not_enough_datastore_capacity = False

        for future in done:
            try:
                response = future.result()
                if response.result == PlaceResultCode.OK:
                    responses.append(response)
                elif response.result == \
                        PlaceResultCode.NOT_ENOUGH_CPU_RESOURCE:
                    not_enough_cpu_resource = True
                elif response.result == \
                        PlaceResultCode.NOT_ENOUGH_MEMORY_RESOURCE:
                    not_enough_memory_resource = True
                elif response.result == \
                        PlaceResultCode.NOT_ENOUGH_DATASTORE_CAPACITY:
                    not_enough_datastore_capacity = True
                elif response.result == \
                        PlaceResultCode.NO_SUCH_RESOURCE:
                    no_such_resource = True
            except Exception, e:
                self._logger.warning(
                    "Caught exception while sending "
                    "place request: %s", str(e))

        best_response = self._scorer.score(responses)

        if best_response is not None:
            return best_response
        elif not_enough_cpu_resource:
            return PlaceResponse(PlaceResultCode.NOT_ENOUGH_CPU_RESOURCE)
        elif not_enough_memory_resource:
            return PlaceResponse(PlaceResultCode.NOT_ENOUGH_MEMORY_RESOURCE)
        elif not_enough_datastore_capacity:
            return PlaceResponse(PlaceResultCode.NOT_ENOUGH_DATASTORE_CAPACITY)
        elif no_such_resource:
            return PlaceResponse(PlaceResultCode.NO_SUCH_RESOURCE)
        else:
            return PlaceResponse(PlaceResultCode.SYSTEM_ERROR)
    def test_slow_heartbeater(self, client_class, time_class, chairman):
        """Don't report missing if the current sequence number is equal to
           the sequence number of the last successful ping.
        """
        client_class.side_effect = self.create_fake_client
        bar_client = MagicMock()
        self._clients["bar"] = bar_client
        children = {"bar": ServerAddress("bar", 1234)}

        # send a ping. bar should get reported resurrected.
        health_checker = HealthChecker("id", children, self.conf)
        time_class.return_value = 0.0
        chairman.return_value.report_resurrected.return_value = \
            ReportResurrectedResponse(result=0)
        health_checker._send_heartbeat()
        health_checker._send_report()
        req = ReportResurrectedRequest(hosts=['bar'], schedulers=None,
                                       scheduler_id='id')
        chairman.return_value.report_resurrected.assert_called_once_with(req)
        self.assertFalse(chairman.return_value.report_missing.called)
        self.assertEquals(health_checker._resurrected_children, set(["bar"]))
        self.assertEquals(health_checker._missing_children, set())

        # call _send_report() again after 100 seconds. bar shouldn't get
        # reported missing since the heartbeater hasn't send another ping.
        time_class.return_value = 100.0
        chairman.reset_mock()
        health_checker._send_report()
        self.assertFalse(chairman.return_value.report_missing.called)
        self.assertFalse(chairman.return_value.report_resurrected.called)

        # ping fails. now the reporter should report bar missing.
        bar_client.ping.side_effect = Exception()
        chairman.return_value.report_missing.return_value = \
            ReportMissingResponse(result=0)
        health_checker._send_heartbeat()
        health_checker._send_report()
        req = ReportMissingRequest(hosts=['bar'], schedulers=None,
                                   scheduler_id='id')
        chairman.return_value.report_missing.assert_called_once_with(req)
        self.assertFalse(chairman.return_value.report_resurrected.called)
    def test_chairman_failure(self, client_class, time_class, chairman):
        """Reporter should retry reporting if chairman fails."""
        client_class.side_effect = self.create_fake_client
        bar_client = MagicMock()
        self._clients["bar"] = bar_client
        children = {"bar": ServerAddress("bar", 1234)}

        # report_resurrected returns a non-zero value
        health_checker = HealthChecker("id", children, self.conf)
        time_class.return_value = 0.0
        health_checker._send_heartbeat()
        chairman.return_value.report_resurrected.return_value = \
            ReportResurrectedResponse(result=1)
        health_checker._send_report()
        req = ReportResurrectedRequest(hosts=['bar'], schedulers=None,
                                       scheduler_id='id')
        chairman.return_value.report_resurrected.assert_called_once_with(req)
        self.assertFalse(chairman.return_value.report_missing.called)

        # report_resurrected throws an exception
        chairman.reset_mock()
        chairman.return_value.report_resurrected.side_effect = Exception()
        health_checker._send_report()
        chairman.return_value.report_resurrected.assert_called_once_with(req)
        self.assertFalse(chairman.return_value.report_missing.called)

        # report succeeds
        chairman.reset_mock()
        chairman.return_value.report_resurrected.side_effect = None
        chairman.return_value.report_resurrected.return_value = \
            ReportResurrectedResponse(result=0)
        health_checker._send_report()
        chairman.return_value.report_resurrected.assert_called_once_with(req)
        self.assertFalse(chairman.return_value.report_missing.called)

        # report doesn't get called anymore.
        chairman.reset_mock()
        health_checker._send_report()
        self.assertFalse(chairman.return_value.report_resurrected.called)
        self.assertFalse(chairman.return_value.report_missing.called)
    def test_report(self, client_class, time_class, chairman):
        """Test that resurrected and missing hosts get reported correctly"""
        client_class.side_effect = self.create_fake_client
        chairman.return_value.report_resurrected.return_value = \
            ReportResurrectedResponse(result=0)
        bar_client = MagicMock()
        baz_client = MagicMock()
        self._clients["bar"] = bar_client
        self._clients["baz"] = baz_client
        children = {"bar": ServerAddress("bar", 1234),
                    "baz": ServerAddress("baz", 1234)}

        # first ping succeeds for bar and baz. they get reported resurrected.
        health_checker = HealthChecker("id", children, self.conf)
        time_class.return_value = 0.0
        health_checker._send_heartbeat()
        health_checker._send_report()
        req = ReportResurrectedRequest(hosts=['bar', 'baz'], schedulers=None,
                                       scheduler_id='id')
        chairman.return_value.report_resurrected.assert_called_once_with(req)
        self.assertFalse(chairman.return_value.report_missing.called)
        self.assertEquals(health_checker._resurrected_children,
                          set(["bar", "baz"]))
        self.assertEquals(health_checker._missing_children, set())

        # call _send_report again. this time nothing should get reported.
        chairman.reset_mock()
        health_checker._send_report()
        self.assertFalse(chairman.return_value.report_missing.called)
        self.assertFalse(chairman.return_value.report_resurrected.called)

        # bar goes missing.
        bar_client.ping.side_effect = Exception()
        health_checker._send_heartbeat()
        time_class.return_value = 100.0
        chairman.return_value.report_missing.return_value = \
            ReportMissingResponse(result=0)
        health_checker._send_report()
        req = ReportMissingRequest(hosts=['bar'], schedulers=None,
                                   scheduler_id='id')
        chairman.return_value.report_missing.assert_called_once_with(req)
        self.assertFalse(chairman.return_value.report_resurrected.called)

        # bar comes back
        chairman.reset_mock()
        bar_client.ping.side_effect = None
        time_class.return_value = 200.0
        health_checker._send_heartbeat()
        chairman.return_value.report_resurrected.return_value = \
            ReportResurrectedResponse(result=0)
        health_checker._send_report()
        req = ReportResurrectedRequest(hosts=['bar'], schedulers=None,
                                       scheduler_id='id')
        self.assertFalse(chairman.return_value.report_missing.called)
        chairman.return_value.report_resurrected.assert_called_once_with(req)