Example #1
    def __init__(self, job, job_name, server_level, downstream_job=None):
        if server_level == 0:
            assert downstream_job is None
            assert downstream_job is not None

        self.job = job
        self.downstream_job = downstream_job
        self.master = None
        self.server_level = server_level

        Server.num_servers.setdefault(job_name, 0)
        Server.num_servers[job_name] += 1
        self.server_id = '%s:%d' % (job_name, Server.num_servers[job_name])

        self.state = ServerStateWrapper(self)

        # Kick off the pseudo-thread to do discovery and get resource
        # capacity.
        scheduler.add_thread(self, 0)
Example #2
  def __init__(self, job, job_name, server_level, downstream_job=None):
    if server_level == 0:
      assert downstream_job is None
      assert downstream_job is not None

    self.job = job
    self.downstream_job = downstream_job
    self.master = None
    self.server_level = server_level

    Server.num_servers.setdefault(job_name, 0)
    Server.num_servers[job_name] += 1
    self.server_id = '%s:%d' % (job_name, Server.num_servers[job_name])

    self.state = ServerStateWrapper(self)

    # Kick off the pseudo-thread to do discovery and get resource
    # capacity.
    scheduler.add_thread(self, 0)
Example #3
class Server(object):
    # Used to generate server identifiers.
    num_servers = dict()

    # Constructor.
    def __init__(self, job, job_name, server_level, downstream_job=None):
        if server_level == 0:
            assert downstream_job is None
            assert downstream_job is not None

        self.job = job
        self.downstream_job = downstream_job
        self.master = None
        self.server_level = server_level

        Server.num_servers.setdefault(job_name, 0)
        Server.num_servers[job_name] += 1
        self.server_id = '%s:%d' % (job_name, Server.num_servers[job_name])

        self.state = ServerStateWrapper(self)

        # Kick off the pseudo-thread to do discovery and get resource
        # capacity.
        scheduler.add_thread(self, 0)

    def get_server_id(self):
        return self.server_id

    def get_server_level(self):
        return self.server_level

    def is_master(self):
        return self.state.get_election_victory_time() != None

    # Tells this server that it is no longer the master. This will reset
    # the internal state.
    def lose_mastership(self):
        assert self.is_master()

        logger.info('%s losing mastership' % self.server_id)

    # Tells this server that it has become the master (as result of a
    # master election having been triggered).
    def become_master(self):
        assert not self.is_master()

        logger.info('%s becoming master' % self.server_id)

        # Wake up the thread that does discovery and getting capacity.
        scheduler.update_thread(self, 0)

    # Returns the reporting data for this server. Just delegates to the
    # wrapped state object.
    def get_reporting_data(self, resource_id):
        assert self.is_master()

        return self.state.get_reporting_data(resource_id)

    # Sends a Discovery RPC to a random task in the server job.
    # This differs from the _discover method in the client code that
    # here we are not interested in the safe capacities. Returns a
    # reference to the server tasks that is the master, or None
    # if we did not find one.
    def _discover(self):
        assert self.server_level > 0

        request = DiscoveryRequest()
        request.client_id = self.server_id

        # Sends the request to a random task in the server job.
        response = self.downstream_job.get_random_task().Discovery_RPC(request)

        # If the response has a master_bns field we store the reference
        # to the master. If not there is no master, which would suck.
        if response.HasField('master_bns'):
            self.master = self.downstream_job.get_task_by_name(
            self.master = None
            logger.warning('%s doesn\'t know who the master is.' %

        return self.master

    # Implements the Discovery RPC.
    def Discovery_RPC(self, request):
        assert request.IsInitialized()

        timer = Gauge.get('server.DiscoveryRPC.latency')
        logger.info('%s handling Discovery RPC from %s' %
                    (self.server_id, request.client_id))
        response = DiscoveryResponse()

        # Sets the master_bns field in the response if there is a current
        # master.
        master = self.job.get_master()

        if master:
            response.master_bns = master.get_server_id()
            # We don't know who the master is.

        # Goes through the resource ids in the request and sets the
        # safe capacity for every resource that has a safe capacity
        # configured.
        for r in request.resource_id:
            t = global_config.find_resource_template(r)

            if t and t.HasField('safe_capacity'):
                safe = response.safe_capacity.add()
                safe.resource_id = r
                safe.safe_capacity = t.safe_capacity

        assert response.IsInitialized()


        return response

    # Figured out when to execute the next _get_capacity call. The interval is determined by the
    # refresh_interval settings of the resources in the state.
    def _renew_capacity_interval(self):
        # Figures out the smallest refresh_interval in the server state.
        delay = sys.maxint

        for resource in self.state.all_resources():
            if resource.HasField('has'):
                delay = min(delay, resource.has.refresh_interval)

        # If that delay is highly improbable we have some error and we use
        # a default delay. This might for instance happen if all resources
        # have lost their (or never gotten any) leases.
        if delay <= 0 or delay == sys.maxint:
            logger.error('%s improbable delay %d, set to %d instead' %
                         (self.server_id, delay, _kDefaultRefreshInterval))
            delay = _kDefaultRefreshInterval

        return delay

    # Get some capacity from the master downstream server.
    def _get_capacity_downstream(self):
        response = self.master.GetServerCapacity_RPC(

        # Did the RPC fail?
        if not response:
            return False

        # Work the response into the state.

        return True

    # Get some capacity for this server to hand out. Returns a boolean to
    # indicate whether this succeeded or failed.
    def _get_capacity(self):
        assert self.is_master()

        now = clock.get_time()

        # Assume the worst... :-)
        success = False

        # If we are server level 0, we need to get the capacity from the
        # configuration.
        if self.server_level == 0:
            for resource in self.state.all_resources():
                algo = AlgorithmImpl.create(resource.template,
                    algo.create_lease(resource, resource.template.capacity))

                # Note, we set a refresh interval here even though the capacity we get from the
                # configuration lasts forever. However by setting a refresh interval and relatively
                # short leases we ensure that configuration changes (e.g. from CDD) are
                # picked up.
                resource.has.refresh_interval *= 2

            success = True
            # If this is not the root server it gets its capacity from
            # a downstream server.
            success = self._get_capacity_downstream()

        logger.info('%s resource state after getting capacity:' %

        for resource in self.state.all_resources():
            logger.info('resource: %s got: %lf lease: %d refresh: %d' %
                        (resource.resource_id, resource.has.capacity,
                         resource.has.expiry_time - now,

        return success

    # Implements the GetServerCapacity RPC.
    def GetServerCapacity_RPC(self, request):
        assert request.IsInitialized()
        assert self.state.is_initialized()

        # Only the master can handle this RPC.
        if not self.is_master():
                '%s getting a GetServerCapacity request when not master' %


            return None

        gauge = Gauge.get('server.GetServerCapacity_RPC.latency')
        now = clock.get_time()

        # Cleans the state. This removes resources and clients with expired
        # leases and such.

        # A set of resources that we need to skip in step 2 (the actual
        # handing out of capacity.
        resources_to_skip = set()

        # First step: Go through the request and update the state with the
        # information from the request.
        for req in request.resource:
             sr) = (self.state.find_server_resource(request.server_id,

            # If this resource does not exist we don't need to do anything right now.
            if resource:
                assert sr

                # Checks whether the last request from this server was at least
                # _kMinimumInterval seconds ago.
                if sr.HasField(
                ) and now - sr.last_request_time < _kMinimumInterval:
                        '%s GetServerCapacity request for resource %s within the %d '
                        'second threshold' %
                        (self.server_id, req.resource_id, _kMinimumInterval))
                    # Updates the state with the information in the request.
                    sr.last_request_time = now
                    sr.outstanding = req.outstanding
                    del sr.wants[:]

                    for w in req.wants:

                    if req.HasField('has'):

        # Creates a new response object in which we will insert the response for
        # the resources contained in the request.
        response = GetServerCapacityResponse()

        # Step 2: Loop through all the individual resource requests in the request
        # and hand out capacity.
        for req in request.resource:
            # If this is a resource we need to skip, let's skip it.
            if req.resource_id in resources_to_skip:

            # Finds the resource and the client state for this resource.
             sr) = (self.state.find_server_resource(request.server_id,

            # Adds a response proto to the overall response.
            resp = response.resource.add()
            resp.resource_id = req.resource_id

            # If this is an unknown resource just give the client whatever it
            # is asking for.
            if not resource:
                assert not sr

                    '%s GetServerCapacity request for unmanaged resource %s' %
                    (self.server_id, req.resource_id))
                resp.gets.expiry_time = now + _kDefaultLeaseTimeForUnknownResources
                resp.gets.capacity = req.wants
                # Finds the algorithm implementation object for this resource.
                algo = AlgorithmImpl.create(resource.template,

                # If the resource is in learning mode we just return whatever the client
                # has now and create a default lease.
                if resource.learning_mode_expiry_time >= now:
                    if sr.HasField('has'):
                        has_now = sr.has.capacity
                        has_now = 0

                    sr.has.CopyFrom(algo.create_lease(resource, has_now))
                    # Otherwise we just run the algorithm. This will update the
                    # client state object.
                    algo.run_server(resource, sr)

                # Copies the output from the algorithm run into the response.

            assert resp.IsInitialized()
                '%s for %s resource: %s wants: %lf gets: %lf lease: %d refresh: %d'
                % (self.server_id, request.server_id, req.resource_id,
                   sum([w.wants for w in req.wants]), resp.gets.capacity,
                   resp.gets.expiry_time - now, resp.gets.refresh_interval))

        assert response.IsInitialized()


        return response

    # Implements the GetCapacity RPC.
    def GetCapacity_RPC(self, request):
        assert request.IsInitialized()
        assert self.state.is_initialized()

        # If this server is not the master it cannot handle this request.
        # The client should do a new Discovery.
        if not self.is_master():
            logger.info('%s getting a GetCapacity request when not master' %

            return None

        timer = Gauge.get('server.GetCapacity_RPC.latency')
        now = clock.get_time()

        # Cleanup the state. This removes resources and clients with expired
        # leases and such.

        # A set of resources that we need to skip in step 2 (the actual
        # handing out of capacity.
        resources_to_skip = set()

        # First step: Go through the request and update the state with the
        # information from the request.
        for req in request.resource:
            # Finds the resource and the client state for this resource.
             cr) = self.state.find_client_resource(request.client_id,

            # If this resource does not exist we don't need to do anything
            # right now.
            if resource:
                assert cr

                # Checks whether the last request from this client was at least
                # _kMinimumInterval seconds ago.
                if cr.HasField(
                ) and now - cr.last_request_time < _kMinimumInterval:
                        '%s GetCapacity request for resource %s within the %d second '
                        'threshold' %
                        (self.server_id, req.resource_id, _kMinimumInterval))
                    # Updates the state with the information in the request.
                    cr.last_request_time = now
                    cr.priority = req.priority
                    cr.wants = req.wants

                    if req.HasField('has'):

        # Creates a new response object in which we will insert the responses for
        # the resources contained in the request.
        response = GetCapacityResponse()

        # Step 2: Loop through all the individual resource requests in the request
        # and hand out capacity.
        for req in request.resource:
            # If this is a resource we need to skip, let's skip it.
            if req.resource_id in resources_to_skip:

            # Finds the resource and the client state for this resource.
             cr) = (self.state.find_client_resource(request.client_id,

            # Adds a response proto to the overall response.
            resp = response.response.add()
            resp.resource_id = req.resource_id

            # If this is an unknown resource just give the client whatever it
            # is asking for.
            if not resource:
                assert not cr

                    '%s GetCapacity request for unmanaged resource %s' %
                    (self.server_id, req.resource_id))
                resp.gets.expiry_time = now + _kDefaultLeaseTimeForUnknownResources
                resp.gets.capacity = req.wants
                # Sets the safe capacity in the response if there is one
                # configured for this resource.
                if resource.template.HasField('safe_capacity'):
                    resp.safe_capacity = resource.template.safe_capacity

                # Finds the algorithm implementation object for this resource.
                algo = AlgorithmImpl.create(resource.template,

                # If the resource is in learning mode we just return whatever the client
                # has now and create a default lease.
                if resource.learning_mode_expiry_time >= now:
                    if cr.HasField('has'):
                        has_now = cr.has.capacity
                        has_now = 0

                    cr.has.CopyFrom(algo.create_lease(resource, has_now))
                    # Otherwise we just run the algorithm. This will update the
                    # client state object.
                    algo.run_client(resource, cr)

                # Copies the output from the algorithm run into the response.

            assert resp.IsInitialized()
                '%s for %s resource: %s wants: %lf gets: %lf lease: %d refresh: %d'
                % (self.server_id, request.client_id, req.resource_id,
                   req.wants, resp.gets.capacity, resp.gets.expiry_time - now,

        assert response.IsInitialized()


        return response

    # This is the main function of the pseudo-thread. It needs to
    # figure out what needs to be done, then do it, and return
    # the timestamp when the next action needs to be scheduled.
    def thread_continue(self):
        # If we are not the master server in the job, we don't need to
        # do anything. Our next scheduled action is at the end of time.
        # Note: When we become the master we will update this interval.
        if not self.is_master():
            return _kTheEndOfTime

        # If this is not the root server we might need to do a discovery.
        if self.server_level > 0:
            # If we don't know who the master is let's figure this out.
            if not self.master:
                # If discovery failed, try another discovery in the
                # near future
                if not self._discover():
                    return _kDefaultDiscoveryInterval

        # Either we know who the master is or we don't need to know because
        # we are the root server. Let's get some capacity. If this
        # fails we need to reschedule a discovery.
        if not self._get_capacity():
            self.master = None

            return 0

        # Returns the interval in which we need to refresh our capacity
        # leases.
        return self._renew_capacity_interval()
Example #4
class Server(object):
  # Used to generate server identifiers.
  num_servers = dict()

  # Constructor.
  def __init__(self, job, job_name, server_level, downstream_job=None):
    if server_level == 0:
      assert downstream_job is None
      assert downstream_job is not None

    self.job = job
    self.downstream_job = downstream_job
    self.master = None
    self.server_level = server_level

    Server.num_servers.setdefault(job_name, 0)
    Server.num_servers[job_name] += 1
    self.server_id = '%s:%d' % (job_name, Server.num_servers[job_name])

    self.state = ServerStateWrapper(self)

    # Kick off the pseudo-thread to do discovery and get resource
    # capacity.
    scheduler.add_thread(self, 0)

  def get_server_id(self):
    return self.server_id

  def get_server_level(self):
    return self.server_level

  def is_master(self):
    return self.state.get_election_victory_time() != None

  # Tells this server that it is no longer the master. This will reset
  # the internal state.
  def lose_mastership(self):
    assert self.is_master()

    logger.info('%s losing mastership' % self.server_id)

  # Tells this server that it has become the master (as result of a
  # master election having been triggered).
  def become_master(self):
    assert not self.is_master()

    logger.info('%s becoming master' % self.server_id)

    # Wake up the thread that does discovery and getting capacity.
    scheduler.update_thread(self, 0)

  # Returns the reporting data for this server. Just delegates to the
  # wrapped state object.
  def get_reporting_data(self, resource_id):
    assert self.is_master()

    return self.state.get_reporting_data(resource_id)

  # Sends a Discovery RPC to a random task in the server job.
  # This differs from the _discover method in the client code that
  # here we are not interested in the safe capacities. Returns a
  # reference to the server tasks that is the master, or None
  # if we did not find one.
  def _discover(self):
    assert self.server_level > 0

    request = DiscoveryRequest()
    request.client_id = self.server_id

    # Sends the request to a random task in the server job.
    response = self.downstream_job.get_random_task().Discovery_RPC(request)

    # If the response has a master_bns field we store the reference
    # to the master. If not there is no master, which would suck.
    if response.HasField('master_bns'):
      self.master = self.downstream_job.get_task_by_name(response.master_bns)
      self.master = None
      logger.warning('%s doesn\'t know who the master is.' % self.server_id)

    return self.master

  # Implements the Discovery RPC.
  def Discovery_RPC(self, request):
    assert request.IsInitialized()

    timer = Gauge.get('server.DiscoveryRPC.latency')
        '%s handling Discovery RPC from %s' %
        (self.server_id, request.client_id))
    response = DiscoveryResponse()

    # Sets the master_bns field in the response if there is a current
    # master.
    master = self.job.get_master()

    if master:
      response.master_bns = master.get_server_id()
      # We don't know who the master is.

    # Goes through the resource ids in the request and sets the
    # safe capacity for every resource that has a safe capacity
    # configured.
    for r in request.resource_id:
      t = global_config.find_resource_template(r)

      if t and t.HasField('safe_capacity'):
        safe = response.safe_capacity.add()
        safe.resource_id = r
        safe.safe_capacity = t.safe_capacity

    assert response.IsInitialized()


    return response

  # Figured out when to execute the next _get_capacity call. The interval is determined by the
  # refresh_interval settings of the resources in the state.
  def _renew_capacity_interval(self):
    # Figures out the smallest refresh_interval in the server state.
    delay = sys.maxint

    for resource in self.state.all_resources():
      if resource.HasField('has'):
        delay = min(delay, resource.has.refresh_interval)

    # If that delay is highly improbable we have some error and we use
    # a default delay. This might for instance happen if all resources
    # have lost their (or never gotten any) leases.
    if delay <= 0 or delay == sys.maxint:
          '%s improbable delay %d, set to %d instead' %
          (self.server_id, delay, _kDefaultRefreshInterval))
      delay = _kDefaultRefreshInterval

    return delay

  # Get some capacity from the master downstream server.
  def _get_capacity_downstream(self):
    response = self.master.GetServerCapacity_RPC(

    # Did the RPC fail?
    if not response:
      return False

    # Work the response into the state.

    return True

  # Get some capacity for this server to hand out. Returns a boolean to
  # indicate whether this succeeded or failed.
  def _get_capacity(self):
    assert self.is_master()

    now = clock.get_time()

    # Assume the worst... :-)
    success = False

    # If we are server level 0, we need to get the capacity from the
    # configuration.
    if self.server_level == 0:
      for resource in self.state.all_resources():
        algo = AlgorithmImpl.create(resource.template, self.server_level)
            algo.create_lease(resource, resource.template.capacity))

        # Note, we set a refresh interval here even though the capacity we get from the
        # configuration lasts forever. However by setting a refresh interval and relatively
        # short leases we ensure that configuration changes (e.g. from CDD) are
        # picked up.
        resource.has.refresh_interval *= 2

      success = True
      # If this is not the root server it gets its capacity from
      # a downstream server.
      success = self._get_capacity_downstream()

    logger.info('%s resource state after getting capacity:' % self.server_id)

    for resource in self.state.all_resources():
          'resource: %s got: %lf lease: %d refresh: %d' %
          (resource.resource_id, resource.has.capacity,
           resource.has.expiry_time - now, resource.has.refresh_interval))

    return success

  # Implements the GetServerCapacity RPC.
  def GetServerCapacity_RPC(self, request):
    assert request.IsInitialized()
    assert self.state.is_initialized()

    # Only the master can handle this RPC.
    if not self.is_master():
          '%s getting a GetServerCapacity request when not master' %


      return None

    gauge = Gauge.get('server.GetServerCapacity_RPC.latency')
    now = clock.get_time()

    # Cleans the state. This removes resources and clients with expired
    # leases and such.

    # A set of resources that we need to skip in step 2 (the actual
    # handing out of capacity.
    resources_to_skip = set()

    # First step: Go through the request and update the state with the
    # information from the request.
    for req in request.resource:
      (resource, sr) = (

      # If this resource does not exist we don't need to do anything right now.
      if resource:
        assert sr

        # Checks whether the last request from this server was at least
        # _kMinimumInterval seconds ago.
        if sr.HasField('last_request_time') and now - sr.last_request_time < _kMinimumInterval:
              '%s GetServerCapacity request for resource %s within the %d '
              'second threshold' %
              (self.server_id, req.resource_id, _kMinimumInterval))
          # Updates the state with the information in the request.
          sr.last_request_time = now
          sr.outstanding = req.outstanding
          del sr.wants[:]

          for w in req.wants:

          if req.HasField('has'):

    # Creates a new response object in which we will insert the response for
    # the resources contained in the request.
    response = GetServerCapacityResponse()

    # Step 2: Loop through all the individual resource requests in the request
    # and hand out capacity.
    for req in request.resource:
      # If this is a resource we need to skip, let's skip it.
      if req.resource_id in resources_to_skip:

      # Finds the resource and the client state for this resource.
      (resource, sr) = (

      # Adds a response proto to the overall response.
      resp = response.resource.add()
      resp.resource_id = req.resource_id

      # If this is an unknown resource just give the client whatever it
      # is asking for.
      if not resource:
        assert not sr

            '%s GetServerCapacity request for unmanaged resource %s' %
            (self.server_id, req.resource_id))
        resp.gets.expiry_time = now + _kDefaultLeaseTimeForUnknownResources
        resp.gets.capacity = req.wants
        # Finds the algorithm implementation object for this resource.
        algo = AlgorithmImpl.create(resource.template, self.server_level)

        # If the resource is in learning mode we just return whatever the client
        # has now and create a default lease.
        if resource.learning_mode_expiry_time >= now:
          if sr.HasField('has'):
            has_now = sr.has.capacity
            has_now = 0

          sr.has.CopyFrom(algo.create_lease(resource, has_now))
          # Otherwise we just run the algorithm. This will update the
          # client state object.
          algo.run_server(resource, sr)

        # Copies the output from the algorithm run into the response.

      assert resp.IsInitialized()
          '%s for %s resource: %s wants: %lf gets: %lf lease: %d refresh: %d' %
          (self.server_id, request.server_id, req.resource_id,
           sum([w.wants for w in req.wants]), resp.gets.capacity,
           resp.gets.expiry_time - now, resp.gets.refresh_interval))

    assert response.IsInitialized()


    return response

  # Implements the GetCapacity RPC.
  def GetCapacity_RPC(self, request):
    assert request.IsInitialized()
    assert self.state.is_initialized()

    # If this server is not the master it cannot handle this request.
    # The client should do a new Discovery.
    if not self.is_master():
      logger.info('%s getting a GetCapacity request when not master' %

      return None

    timer = Gauge.get('server.GetCapacity_RPC.latency')
    now = clock.get_time()

    # Cleanup the state. This removes resources and clients with expired
    # leases and such.

    # A set of resources that we need to skip in step 2 (the actual
    # handing out of capacity.
    resources_to_skip = set()

    # First step: Go through the request and update the state with the
    # information from the request.
    for req in request.resource:
       # Finds the resource and the client state for this resource.
      (resource, cr) = self.state.find_client_resource(

      # If this resource does not exist we don't need to do anything
      # right now.
      if resource:
        assert cr

        # Checks whether the last request from this client was at least
        # _kMinimumInterval seconds ago.
        if cr.HasField('last_request_time') and now - cr.last_request_time < _kMinimumInterval:
              '%s GetCapacity request for resource %s within the %d second '
              'threshold' %
              (self.server_id, req.resource_id, _kMinimumInterval))
          # Updates the state with the information in the request.
          cr.last_request_time = now
          cr.priority = req.priority
          cr.wants = req.wants

          if req.HasField('has'):

    # Creates a new response object in which we will insert the responses for
    # the resources contained in the request.
    response = GetCapacityResponse()

    # Step 2: Loop through all the individual resource requests in the request
    # and hand out capacity.
    for req in request.resource:
      # If this is a resource we need to skip, let's skip it.
      if req.resource_id in resources_to_skip:

      # Finds the resource and the client state for this resource.
      (resource, cr) = (

      # Adds a response proto to the overall response.
      resp = response.response.add()
      resp.resource_id = req.resource_id

      # If this is an unknown resource just give the client whatever it
      # is asking for.
      if not resource:
        assert not cr

            '%s GetCapacity request for unmanaged resource %s' %
            (self.server_id, req.resource_id))
        resp.gets.expiry_time = now + _kDefaultLeaseTimeForUnknownResources
        resp.gets.capacity = req.wants
        # Sets the safe capacity in the response if there is one
        # configured for this resource.
        if resource.template.HasField('safe_capacity'):
          resp.safe_capacity = resource.template.safe_capacity

        # Finds the algorithm implementation object for this resource.
        algo = AlgorithmImpl.create(resource.template, self.server_level)

        # If the resource is in learning mode we just return whatever the client
        # has now and create a default lease.
        if resource.learning_mode_expiry_time >= now:
          if cr.HasField('has'):
            has_now = cr.has.capacity
            has_now = 0

          cr.has.CopyFrom(algo.create_lease(resource, has_now))
          # Otherwise we just run the algorithm. This will update the
          # client state object.
          algo.run_client(resource, cr)

        # Copies the output from the algorithm run into the response.

      assert resp.IsInitialized()
          '%s for %s resource: %s wants: %lf gets: %lf lease: %d refresh: %d' %
          (self.server_id, request.client_id, req.resource_id, req.wants,
           resp.gets.capacity, resp.gets.expiry_time - now,

    assert response.IsInitialized()


    return response

  # This is the main function of the pseudo-thread. It needs to
  # figure out what needs to be done, then do it, and return
  # the timestamp when the next action needs to be scheduled.
  def thread_continue(self):
    # If we are not the master server in the job, we don't need to
    # do anything. Our next scheduled action is at the end of time.
    # Note: When we become the master we will update this interval.
    if not self.is_master():
      return _kTheEndOfTime

    # If this is not the root server we might need to do a discovery.
    if self.server_level > 0:
      # If we don't know who the master is let's figure this out.
      if not self.master:
        # If discovery failed, try another discovery in the
        # near future
        if not self._discover():
          return _kDefaultDiscoveryInterval

    # Either we know who the master is or we don't need to know because
    # we are the root server. Let's get some capacity. If this
    # fails we need to reschedule a discovery.
    if not self._get_capacity():
      self.master = None

      return 0

    # Returns the interval in which we need to refresh our capacity
    # leases.
    return self._renew_capacity_interval()