def find_resource(self, resource_id): # This can only happen in the master! assert self.server.is_master() # Lineair scan through all the resources in the state to find this # resource. for r in self.wrapped_state.resource: if r.resource_id == resource_id: return r # The resource was not found. Find the resource template which # describes this resource. t = global_config.find_resource_template(resource_id) # No template found. if not t: logger.error( 'Cannot create server state entry for resource %s (no template)' % resource_id) return None # We got the template. Now create a new blank resource entry in the server # state. logger.info( '%s creating new resource %s' % (self.get_server_id(), resource_id)) r = self.wrapped_state.resource.add() r.template.CopyFrom(t) r.resource_id = resource_id # Calculates the time this resource went out of learning mode. # Note: This time may be in the past. r.learning_mode_expiry_time = self.wrapped_state.election_victory_time + \ AlgorithmImpl.create(t, self.wrapped_state.server_level).get_max_lease_duration() if r.learning_mode_expiry_time > clock.get_time(): logger.info( '%s putting resource %s in learning mode until T=%d' % (self.get_server_id(), resource_id, r.learning_mode_expiry_time)) # Schedules an action to log a message when this resource leaves # learning mode. scheduler.add_absolute( r.learning_mode_expiry_time + 1, _LeaveLearningMode(), (self.get_server_id(), r.resource_id)) # Note: At this point this server has not capacity lease for this resource. # It is up to the caller to deal with this. assert r.IsInitialized() return r
def find_resource(self, resource_id): # This can only happen in the master! assert self.server.is_master() # Lineair scan through all the resources in the state to find this # resource. for r in self.wrapped_state.resource: if r.resource_id == resource_id: return r # The resource was not found. Find the resource template which # describes this resource. t = global_config.find_resource_template(resource_id) # No template found. if not t: logger.error( 'Cannot create server state entry for resource %s (no template)' % resource_id) return None # We got the template. Now create a new blank resource entry in the server # state. logger.info('%s creating new resource %s' % (self.get_server_id(), resource_id)) r = self.wrapped_state.resource.add() r.template.CopyFrom(t) r.resource_id = resource_id # Calculates the time this resource went out of learning mode. # Note: This time may be in the past. r.learning_mode_expiry_time = self.wrapped_state.election_victory_time + \ AlgorithmImpl.create(t, self.wrapped_state.server_level).get_max_lease_duration() if r.learning_mode_expiry_time > clock.get_time(): logger.info('%s putting resource %s in learning mode until T=%d' % (self.get_server_id(), resource_id, r.learning_mode_expiry_time)) # Schedules an action to log a message when this resource leaves # learning mode. scheduler.add_absolute(r.learning_mode_expiry_time + 1, _LeaveLearningMode(), (self.get_server_id(), r.resource_id)) # Note: At this point this server has not capacity lease for this resource. # It is up to the caller to deal with this. assert r.IsInitialized() return r
def _get_capacity(self): assert self.is_master() now = clock.get_time() # Assume the worst... :-) success = False # If we are server level 0, we need to get the capacity from the # configuration. if self.server_level == 0: for resource in self.state.all_resources(): algo = AlgorithmImpl.create(resource.template, self.server_level) resource.ClearField('has') resource.has.CopyFrom( algo.create_lease(resource, resource.template.capacity)) # Note, we set a refresh interval here even though the capacity we get from the # configuration lasts forever. However by setting a refresh interval and relatively # short leases we ensure that configuration changes (e.g. from CDD) are # picked up. resource.has.refresh_interval *= 2 success = True else: # If this is not the root server it gets its capacity from # a downstream server. success = self._get_capacity_downstream() logger.info('%s resource state after getting capacity:' % self.server_id) for resource in self.state.all_resources(): logger.info('resource: %s got: %lf lease: %d refresh: %d' % (resource.resource_id, resource.has.capacity, resource.has.expiry_time - now, resource.has.refresh_interval)) return success
def _get_capacity(self): assert self.is_master() now = clock.get_time() # Assume the worst... :-) success = False # If we are server level 0, we need to get the capacity from the # configuration. if self.server_level == 0: for resource in self.state.all_resources(): algo = AlgorithmImpl.create(resource.template, self.server_level) resource.ClearField('has') resource.has.CopyFrom( algo.create_lease(resource, resource.template.capacity)) # Note, we set a refresh interval here even though the capacity we get from the # configuration lasts forever. However by setting a refresh interval and relatively # short leases we ensure that configuration changes (e.g. from CDD) are # picked up. resource.has.refresh_interval *= 2 success = True else: # If this is not the root server it gets its capacity from # a downstream server. success = self._get_capacity_downstream() logger.info('%s resource state after getting capacity:' % self.server_id) for resource in self.state.all_resources(): logger.info( 'resource: %s got: %lf lease: %d refresh: %d' % (resource.resource_id, resource.has.capacity, resource.has.expiry_time - now, resource.has.refresh_interval)) return success
def GetCapacity_RPC(self, request): assert request.IsInitialized() assert self.state.is_initialized() # If this server is not the master it cannot handle this request. # The client should do a new Discovery. if not self.is_master(): self.state.assert_clean() logger.info('%s getting a GetCapacity request when not master' % self.server_id) Counter.get('server.GetCapacity_RPC.not_master').inc() return None timer = Gauge.get('server.GetCapacity_RPC.latency') timer.start_timer() logger.debug(request) now = clock.get_time() # Cleanup the state. This removes resources and clients with expired # leases and such. self.state.cleanup() # A set of resources that we need to skip in step 2 (the actual # handing out of capacity. resources_to_skip = set() # First step: Go through the request and update the state with the # information from the request. for req in request.resource: # Finds the resource and the client state for this resource. (resource, cr) = self.state.find_client_resource(request.client_id, req.resource_id) # If this resource does not exist we don't need to do anything # right now. if resource: assert cr # Checks whether the last request from this client was at least # _kMinimumInterval seconds ago. if cr.HasField( 'last_request_time' ) and now - cr.last_request_time < _kMinimumInterval: logger.warning( '%s GetCapacity request for resource %s within the %d second ' 'threshold' % (self.server_id, req.resource_id, _kMinimumInterval)) resources_to_skip.add(req.resource_id) else: # Updates the state with the information in the request. cr.last_request_time = now cr.priority = req.priority cr.wants = req.wants if req.HasField('has'): cr.has.CopyFrom(req.has) else: cr.ClearField('has') # Creates a new response object in which we will insert the responses for # the resources contained in the request. response = GetCapacityResponse() # Step 2: Loop through all the individual resource requests in the request # and hand out capacity. for req in request.resource: # If this is a resource we need to skip, let's skip it. if req.resource_id in resources_to_skip: continue # Finds the resource and the client state for this resource. (resource, cr) = (self.state.find_client_resource(request.client_id, req.resource_id)) # Adds a response proto to the overall response. resp = response.response.add() resp.resource_id = req.resource_id # If this is an unknown resource just give the client whatever it # is asking for. if not resource: assert not cr logger.warning( '%s GetCapacity request for unmanaged resource %s' % (self.server_id, req.resource_id)) resp.gets.expiry_time = now + _kDefaultLeaseTimeForUnknownResources resp.gets.capacity = req.wants else: # Sets the safe capacity in the response if there is one # configured for this resource. if resource.template.HasField('safe_capacity'): resp.safe_capacity = resource.template.safe_capacity # Finds the algorithm implementation object for this resource. algo = AlgorithmImpl.create(resource.template, self.server_level) # If the resource is in learning mode we just return whatever the client # has now and create a default lease. if resource.learning_mode_expiry_time >= now: if cr.HasField('has'): has_now = cr.has.capacity else: has_now = 0 cr.has.CopyFrom(algo.create_lease(resource, has_now)) Counter.get('server.learning_mode_response').inc() else: # Otherwise we just run the algorithm. This will update the # client state object. algo.run_client(resource, cr) Counter.get('server.algorithm_runs').inc() # Copies the output from the algorithm run into the response. resp.gets.CopyFrom(cr.has) assert resp.IsInitialized() logger.info( '%s for %s resource: %s wants: %lf gets: %lf lease: %d refresh: %d' % (self.server_id, request.client_id, req.resource_id, req.wants, resp.gets.capacity, resp.gets.expiry_time - now, resp.gets.refresh_interval)) assert response.IsInitialized() timer.stop_timer() return response
def GetCapacity_RPC(self, request): assert request.IsInitialized() assert self.state.is_initialized() # If this server is not the master it cannot handle this request. # The client should do a new Discovery. if not self.is_master(): self.state.assert_clean() logger.info('%s getting a GetCapacity request when not master' % self.server_id) Counter.get('server.GetCapacity_RPC.not_master').inc() return None timer = Gauge.get('server.GetCapacity_RPC.latency') timer.start_timer() logger.debug(request) now = clock.get_time() # Cleanup the state. This removes resources and clients with expired # leases and such. self.state.cleanup() # A set of resources that we need to skip in step 2 (the actual # handing out of capacity. resources_to_skip = set() # First step: Go through the request and update the state with the # information from the request. for req in request.resource: # Finds the resource and the client state for this resource. (resource, cr) = self.state.find_client_resource( request.client_id, req.resource_id) # If this resource does not exist we don't need to do anything # right now. if resource: assert cr # Checks whether the last request from this client was at least # _kMinimumInterval seconds ago. if cr.HasField('last_request_time') and now - cr.last_request_time < _kMinimumInterval: logger.warning( '%s GetCapacity request for resource %s within the %d second ' 'threshold' % (self.server_id, req.resource_id, _kMinimumInterval)) resources_to_skip.add(req.resource_id) else: # Updates the state with the information in the request. cr.last_request_time = now cr.priority = req.priority cr.wants = req.wants if req.HasField('has'): cr.has.CopyFrom(req.has) else: cr.ClearField('has') # Creates a new response object in which we will insert the responses for # the resources contained in the request. response = GetCapacityResponse() # Step 2: Loop through all the individual resource requests in the request # and hand out capacity. for req in request.resource: # If this is a resource we need to skip, let's skip it. if req.resource_id in resources_to_skip: continue # Finds the resource and the client state for this resource. (resource, cr) = ( self.state.find_client_resource( request.client_id, req.resource_id)) # Adds a response proto to the overall response. resp = response.response.add() resp.resource_id = req.resource_id # If this is an unknown resource just give the client whatever it # is asking for. if not resource: assert not cr logger.warning( '%s GetCapacity request for unmanaged resource %s' % (self.server_id, req.resource_id)) resp.gets.expiry_time = now + _kDefaultLeaseTimeForUnknownResources resp.gets.capacity = req.wants else: # Sets the safe capacity in the response if there is one # configured for this resource. if resource.template.HasField('safe_capacity'): resp.safe_capacity = resource.template.safe_capacity # Finds the algorithm implementation object for this resource. algo = AlgorithmImpl.create(resource.template, self.server_level) # If the resource is in learning mode we just return whatever the client # has now and create a default lease. if resource.learning_mode_expiry_time >= now: if cr.HasField('has'): has_now = cr.has.capacity else: has_now = 0 cr.has.CopyFrom(algo.create_lease(resource, has_now)) Counter.get('server.learning_mode_response').inc() else: # Otherwise we just run the algorithm. This will update the # client state object. algo.run_client(resource, cr) Counter.get('server.algorithm_runs').inc() # Copies the output from the algorithm run into the response. resp.gets.CopyFrom(cr.has) assert resp.IsInitialized() logger.info( '%s for %s resource: %s wants: %lf gets: %lf lease: %d refresh: %d' % (self.server_id, request.client_id, req.resource_id, req.wants, resp.gets.capacity, resp.gets.expiry_time - now, resp.gets.refresh_interval)) assert response.IsInitialized() timer.stop_timer() return response