Exemple #1
0
 def test_stop(self):
     """Make sure start() starts threads and stop() stops them"""
     health_checker = HealthChecker("id", {}, self.conf)
     self.assertFalse(health_checker._heartbeater.is_alive())
     self.assertFalse(health_checker._reporter.is_alive())
     health_checker.start()
     self.assertTrue(health_checker._heartbeater.is_alive())
     self.assertTrue(health_checker._reporter.is_alive())
     health_checker.stop()
     self.assertFalse(health_checker._heartbeater.is_alive())
     self.assertFalse(health_checker._reporter.is_alive())
 def test_stop(self):
     """Make sure start() starts threads and stop() stops them"""
     health_checker = HealthChecker("id", {}, self.conf)
     self.assertFalse(health_checker._heartbeater.is_alive())
     self.assertFalse(health_checker._reporter.is_alive())
     health_checker.start()
     self.assertTrue(health_checker._heartbeater.is_alive())
     self.assertTrue(health_checker._reporter.is_alive())
     health_checker.stop()
     self.assertFalse(health_checker._heartbeater.is_alive())
     self.assertFalse(health_checker._reporter.is_alive())
Exemple #3
0
class LeafScheduler(BaseScheduler):
    """Leaf scheduler manages child hosts."""
    def __init__(self, scheduler_id, ut_ratio, enable_health_checker=True):
        """Create a new leaf scheduler.

        :param scheduler_id: scheduler id
        :type scheduler_id: str
        :type enable_health_checker: enables health checking of children.
        """
        self._logger = logging.getLogger(__name__)
        self._logger.info("Creating leaf scheduler: %s" % scheduler_id)
        self.lock = threading.RLock()
        self._latch = CountUpDownLatch()
        self._place_strategy = RandomSubsetStrategy(PLACE_FAN_OUT_RATIO,
                                                    MIN_PLACE_FAN_OUT,
                                                    MAX_PLACE_FAN_OUT)
        self._scheduler_id = scheduler_id
        self._hosts = []
        self._scorer = DefaultScorer(ut_ratio)
        self._threadpool = None
        self._initialize_services(scheduler_id)
        self._health_checker = None
        self._enable_health_checker = enable_health_checker
        self._configured = ConfigStates.UNINITIALIZED

    def _initialize_services(self, scheduler_id):
        self._threadpool = common.services.get(ThreadPoolExecutor)
        self._scheduler_client = SchedulerClient()

    @locked
    def configure(self, hosts):
        """Configure the leaf scheduler.

        :param hosts: list of child hosts
        :type hosts: list of str
        """
        # Transfer children's constraints from list to set, so searching
        # elements are more efficient.
        self._hosts = []

        for host in hosts:
            self._hosts.append(ChildInfo.from_thrift(host))

        self._coalesce_resources(self._hosts)

        if self._health_checker:
            self._health_checker.stop()
        if self._enable_health_checker:
            # initialize health checker with the new set of children.
            agent_config = common.services.get(ServiceName.AGENT_CONFIG)
            children = dict((host.id, ServerAddress(host.address, host.port))
                            for host in self._hosts)
            self._health_checker = HealthChecker(self._scheduler_id, children,
                                                 agent_config)
            self._health_checker.start()
        self._configured = ConfigStates.INITIALIZED

    @locked
    def _get_hosts(self):
        """
        Get the list of hosts for this scheduler.
        The returned list is a deep copy of the set of hosts so a subsequent
        call to configure the host is not stepping on calls in flight.
        Assumes the host is configured as a leaf scheduler.
        :rtype: list of str
        """
        return list(self._hosts)

    def mark_pending(func):
        """
        Decorator for bumping up the pending count for calls that are inflight.
        """
        @log_request(log_level=logging.debug)
        def nested(self, *args, **kwargs):
            self._latch.count_up()
            self._logger.debug("latch counted up to: {0}".format(
                self._latch.count))
            try:
                return func(self, *args, **kwargs)
            finally:
                self._latch.count_down()
                self._logger.debug("latch counted down to: {0}".format(
                    self._latch.count))

        return nested

    @mark_pending
    def find(self, request):
        """Find the specified resource.

        :type request: FindRequest
        :rtype: FindResponse
        :raise: InvalidScheduler
        """
        if self._configured == ConfigStates.UNINITIALIZED:
            raise InvalidScheduler()

        # Host service only has a single scheduler
        request.scheduler_id = None

        futures = []
        for agent in self._get_hosts():
            future = self._threadpool.submit(self._find_worker, agent.address,
                                             agent.port, agent.id, request)
            futures.append(future)

        done, not_done = concurrent.futures.wait(futures, timeout=FIND_TIMEOUT)
        self._logger.info("Find responses received: %d, timed out: %d",
                          len(done), len(not_done))

        for future in done:
            response = future.result()
            if response.result == FindResultCode.OK:
                return response

        return FindResponse(FindResultCode.NOT_FOUND)

    @mark_pending
    def place(self, request):
        """Place the specified resources.

        :type request: PlaceRequest
        :rtype: PlaceResponse
        :raise: InvalidScheduler
        """
        if self._configured == ConfigStates.UNINITIALIZED:
            raise InvalidScheduler()

        request.scheduler_id = None

        constraints = self._collect_constraints(request.resource)
        selected = self._placement_hosts(request, constraints)
        if len(selected) == 0:
            return PlaceResponse(PlaceResultCode.NO_SUCH_RESOURCE)

        selected = self._filter_missing_hosts(selected)
        done = self._execute_placement(selected, request)

        responses = []
        no_such_resource = False
        not_enough_memory_resource = False
        not_enough_cpu_resource = False
        not_enough_datastore_capacity = False

        for future in done:
            try:
                response = future.result()
                if response.result == PlaceResultCode.OK:
                    responses.append(response)
                elif response.result == \
                        PlaceResultCode.NOT_ENOUGH_CPU_RESOURCE:
                    not_enough_cpu_resource = True
                elif response.result == \
                        PlaceResultCode.NOT_ENOUGH_MEMORY_RESOURCE:
                    not_enough_memory_resource = True
                elif response.result == \
                        PlaceResultCode.NOT_ENOUGH_DATASTORE_CAPACITY:
                    not_enough_datastore_capacity = True
                elif response.result == \
                        PlaceResultCode.NO_SUCH_RESOURCE:
                    no_such_resource = True
            except Exception, e:
                self._logger.warning(
                    "Caught exception while sending "
                    "place request: %s", str(e))

        best_response = self._scorer.score(responses)

        if best_response is not None:
            return best_response
        elif not_enough_cpu_resource:
            return PlaceResponse(PlaceResultCode.NOT_ENOUGH_CPU_RESOURCE)
        elif not_enough_memory_resource:
            return PlaceResponse(PlaceResultCode.NOT_ENOUGH_MEMORY_RESOURCE)
        elif not_enough_datastore_capacity:
            return PlaceResponse(PlaceResultCode.NOT_ENOUGH_DATASTORE_CAPACITY)
        elif no_such_resource:
            return PlaceResponse(PlaceResultCode.NO_SUCH_RESOURCE)
        else:
            return PlaceResponse(PlaceResultCode.SYSTEM_ERROR)
class LeafScheduler(BaseScheduler):
    """Leaf scheduler manages child hosts."""

    def __init__(self, scheduler_id, ut_ratio, enable_health_checker=True):
        """Create a new leaf scheduler.

        :param scheduler_id: scheduler id
        :type scheduler_id: str
        :type enable_health_checker: enables health checking of children.
        """
        self._logger = logging.getLogger(__name__)
        self._logger.info("Creating leaf scheduler: %s" % scheduler_id)
        self.lock = threading.RLock()
        self._latch = CountUpDownLatch()
        self._place_strategy = RandomSubsetStrategy(PLACE_FAN_OUT_RATIO,
                                                    MIN_PLACE_FAN_OUT,
                                                    MAX_PLACE_FAN_OUT)
        self._scheduler_id = scheduler_id
        self._hosts = []
        self._scorer = DefaultScorer(ut_ratio)
        self._threadpool = None
        self._initialize_services(scheduler_id)
        self._health_checker = None
        self._enable_health_checker = enable_health_checker
        self._configured = ConfigStates.UNINITIALIZED

    def _initialize_services(self, scheduler_id):
        self._threadpool = common.services.get(ThreadPoolExecutor)
        self._scheduler_client = SchedulerClient()

    @locked
    def configure(self, hosts):
        """Configure the leaf scheduler.

        :param hosts: list of child hosts
        :type hosts: list of str
        """
        # Transfer children's constraints from list to set, so searching
        # elements are more efficient.
        self._hosts = []

        for host in hosts:
            self._hosts.append(ChildInfo.from_thrift(host))

        self._coalesce_resources(self._hosts)

        if self._health_checker:
            self._health_checker.stop()
        if self._enable_health_checker:
            # initialize health checker with the new set of children.
            agent_config = common.services.get(ServiceName.AGENT_CONFIG)
            children = dict((host.id, ServerAddress(host.address, host.port))
                            for host in self._hosts)
            self._health_checker = HealthChecker(self._scheduler_id, children,
                                                 agent_config)
            self._health_checker.start()
        self._configured = ConfigStates.INITIALIZED

    @locked
    def _get_hosts(self):
        """
        Get the list of hosts for this scheduler.
        The returned list is a deep copy of the set of hosts so a subsequent
        call to configure the host is not stepping on calls in flight.
        Assumes the host is configured as a leaf scheduler.
        :rtype: list of str
        """
        return list(self._hosts)

    def mark_pending(func):
        """
        Decorator for bumping up the pending count for calls that are inflight.
        """
        @log_request(log_level=logging.debug)
        def nested(self, *args, **kwargs):
            self._latch.count_up()
            self._logger.debug(
                "latch counted up to: {0}".format(self._latch.count))
            try:
                return func(self, *args, **kwargs)
            finally:
                self._latch.count_down()
                self._logger.debug(
                    "latch counted down to: {0}".format(self._latch.count))
        return nested

    @mark_pending
    def find(self, request):
        """Find the specified resource.

        :type request: FindRequest
        :rtype: FindResponse
        :raise: InvalidScheduler
        """
        if self._configured == ConfigStates.UNINITIALIZED:
            raise InvalidScheduler()

        # Host service only has a single scheduler
        request.scheduler_id = None

        futures = []
        for agent in self._get_hosts():
            future = self._threadpool.submit(
                self._find_worker, agent.address, agent.port,
                agent.id, request)
            futures.append(future)

        done, not_done = concurrent.futures.wait(futures, timeout=FIND_TIMEOUT)
        self._logger.info("Find responses received: %d, timed out: %d",
                          len(done), len(not_done))

        for future in done:
            response = future.result()
            if response.result == FindResultCode.OK:
                return response

        return FindResponse(FindResultCode.NOT_FOUND)

    @mark_pending
    def place(self, request):
        """Place the specified resources.

        :type request: PlaceRequest
        :rtype: PlaceResponse
        :raise: InvalidScheduler
        """
        if self._configured == ConfigStates.UNINITIALIZED:
            raise InvalidScheduler()

        request.scheduler_id = None

        constraints = self._collect_constraints(request.resource)
        selected = self._placement_hosts(request, constraints)
        if len(selected) == 0:
            return PlaceResponse(PlaceResultCode.NO_SUCH_RESOURCE)

        selected = self._filter_missing_hosts(selected)
        done = self._execute_placement(selected, request)

        responses = []
        no_such_resource = False
        not_enough_memory_resource = False
        not_enough_cpu_resource = False
        not_enough_datastore_capacity = False

        for future in done:
            try:
                response = future.result()
                if response.result == PlaceResultCode.OK:
                    responses.append(response)
                elif response.result == \
                        PlaceResultCode.NOT_ENOUGH_CPU_RESOURCE:
                    not_enough_cpu_resource = True
                elif response.result == \
                        PlaceResultCode.NOT_ENOUGH_MEMORY_RESOURCE:
                    not_enough_memory_resource = True
                elif response.result == \
                        PlaceResultCode.NOT_ENOUGH_DATASTORE_CAPACITY:
                    not_enough_datastore_capacity = True
                elif response.result == \
                        PlaceResultCode.NO_SUCH_RESOURCE:
                    no_such_resource = True
            except Exception, e:
                self._logger.warning(
                    "Caught exception while sending "
                    "place request: %s", str(e))

        best_response = self._scorer.score(responses)

        if best_response is not None:
            return best_response
        elif not_enough_cpu_resource:
            return PlaceResponse(PlaceResultCode.NOT_ENOUGH_CPU_RESOURCE)
        elif not_enough_memory_resource:
            return PlaceResponse(PlaceResultCode.NOT_ENOUGH_MEMORY_RESOURCE)
        elif not_enough_datastore_capacity:
            return PlaceResponse(PlaceResultCode.NOT_ENOUGH_DATASTORE_CAPACITY)
        elif no_such_resource:
            return PlaceResponse(PlaceResultCode.NO_SUCH_RESOURCE)
        else:
            return PlaceResponse(PlaceResultCode.SYSTEM_ERROR)