def get_endpoint_metrics(self, endpoint_name): blob = self.zk_conn.read(paths.endpoint_live_metrics(endpoint_name)) if blob: return json.loads(blob) else: blob = self.zk_conn.read(paths.endpoint_custom_metrics(endpoint_name)) if blob: return json.loads(blob) else: return blob
def health_check(self): # Save and load the current metrics. endpoint_metrics, active_connections = self.update_metrics() # Does a health check on all the endpoints that are being managed. for endpoint in self.endpoints.values(): # Do not kick the endpoint if it is not currently owned by us. if not(self.endpoint_owned(endpoint)): continue try: metrics, metric_ips, endpoint_connections = \ self.load_metrics(endpoint, endpoint_metrics) # Compute the active set (including custom metrics, etc.). active = active_connections.get(endpoint.name, []) active.extend(endpoint_connections) active = list(set(active)) # Compute the globally weighted averages. metrics = calculate_weighted_averages(metrics) # Update the live metrics and connections. logging.debug("Metrics for endpoint %s from %s: %s" % \ (endpoint.name, str(metric_ips), metrics)) self.zk_conn.write(paths.endpoint_live_metrics(endpoint.name), \ json.dumps(metrics), \ ephemeral=True) self.zk_conn.write(paths.endpoint_live_active(endpoint.name), \ json.dumps(active), \ ephemeral=True) # Run a health check on this endpoint. endpoint.health_check(active) # Do the endpoint update. endpoint.update(reconfigure=False, metrics=metrics, metric_instances=len(metric_ips), active_ips=active) except: error = traceback.format_exc() logging.error("Error updating endpoint %s: %s" % (endpoint.name, error)) try: # Try updating our logs. self.zk_conn.write(paths.manager_log(self.uuid), self.log.getvalue(), ephemeral=True) except: error = traceback.format_exc() logging.error("Error saving logs: %s" % error) # Reset the buffer. self.log.truncate(0)