async def get_component_metrics( tmanager, component: str, metric_names: List[str], instances: List[str], interval: int, ) -> ComponentMetrics: """ Return metrics from the Tmanager over the given interval. The metrics property is keyed with (metric, instance) to metric values. Metrics not included in `metric_names` will be truncated. """ if not (tmanager and tmanager.host and tmanager.stats_port): raise Exception("No Tmanager found") metric_request = tmanager_pb2.MetricRequest() metric_request.component_name = component if instances: metric_request.instance_id.extend(instances) metric_request.metric.extend(metric_names) metric_request.interval = interval url = f"http://{tmanager.host}:{tmanager.stats_port}/stats" async with httpx.AsyncClient() as client: response = await client.post(url, data=metric_request.SerializeToString()) metric_response = tmanager_pb2.MetricResponse() metric_response.ParseFromString(response.content) if metric_response.status.status == common_pb2.NOTOK: if metric_response.status.HasField("message"): Log.warn("Received response from Tmanager: %s", metric_response.status.message) metrics = {} for metric in metric_response.metric: instance = metric.instance_id for instance_metric in metric.metric: metrics.setdefault(instance_metric.name, {})[instance] = instance_metric.value return ComponentMetrics( interval=metric_response.interval, component=component, metrics=metrics, )
def initializeFromRC(cls, rcfile): if len(cls.cmdmap) > 0: return effective_rc = (rcfile, HERON_RC)[rcfile is None] Log.debug('Effective RC file is %s', effective_rc) if os.path.exists(effective_rc): with open(effective_rc) as f: cls.cmdmap['*']['*'] = collections.defaultdict(dict) cls.cmdmap['*']['*']['*'] = '' for line in f: m = heron_command_pattern.match(line) app, value, command, env = '', '', '', '' if m is not None: value = cls.remove_comments( m.group(4).rstrip(os.linesep)) app = (m.group(1), '')[m.group(1) is None or m.group(1) == ''] command = (m.group(2), '')[m.group(2) is None or m.group(1) == ''] env = (m.group(3), '')[m.group(3) is None or m.group(2) == ''] else: continue # make sure that all the single args have a boolean value # associated so that we can load the args to a key value # structure args_list = config.insert_bool_values(value.split()) args_list_string = ' '.join(args_list) if not command or not app or not env: Log.warn( "heronrc config entry %s does not have key parameters (command:app:env) ", line) continue if app not in cls.cmdmap: cls.cmdmap[app] = collections.defaultdict(dict) if command in cls.cmdmap[app] and env in cls.cmdmap[app][ command]: cls.cmdmap[app][command][env] = cls.cmdmap[app][ command][env] + ' ' + args_list_string else: cls.cmdmap[app][command][env] = args_list_string Log.debug("RC cmdmap %s", json.dumps(cls.cmdmap)) else: Log.debug("%s is not an existing file", effective_rc)
def initializeFromRC(cls, rcfile): if len(cls.cmdmap) > 0: return effective_rc = (rcfile, HERON_RC)[rcfile is None] Log.debug('Effective RC file is %s', effective_rc) if os.path.exists(effective_rc): with open(effective_rc) as f: cls.cmdmap['*']['*'] = collections.defaultdict(dict) cls.cmdmap['*']['*']['*'] = '' for line in f: m = heron_command_pattern.match(line) app, value, command, env = '', '', '', '' if m is not None: value = cls.remove_comments(m.group(4).rstrip(os.linesep)) app = (m.group(1), '')[m.group(1) is None or m.group(1) == ''] command = (m.group(2), '')[m.group(2) is None or m.group(1) == ''] env = (m.group(3), '')[m.group(3) is None or m.group(2) == ''] else: continue # make sure that all the single args have a boolean value # associated so that we can load the args to a key value # structure args_list = config.insert_bool_values(value.split()) args_list_string = ' '.join(args_list) if not command or not app or not env: Log.warn("heronrc config entry %s does not have key parameters (command:app:env) ", line) continue if app not in cls.cmdmap: cls.cmdmap[app] = collections.defaultdict(dict) if command in cls.cmdmap[app] and env in cls.cmdmap[app][command]: cls.cmdmap[app][command][env] = cls.cmdmap[app][command][env] + ' ' + args_list_string else: cls.cmdmap[app][command][env] = args_list_string Log.debug("RC cmdmap %s", json.dumps(cls.cmdmap)) else: Log.debug("%s is not an existing file", effective_rc)
def getComponentMetrics(self, tmaster, componentName, metricNames, instances, interval, callback=None): """ Get the specified metrics for the given component name of this topology. Returns the following dict on success: { "metrics": { <metricname>: { <instance>: <numeric value>, <instance>: <numeric value>, ... }, ... }, "interval": <numeric value>, "component": "..." } Raises exception on failure. """ if not tmaster or not tmaster.host or not tmaster.stats_port: raise Exception("No Tmaster found") host = tmaster.host port = tmaster.stats_port metricRequest = tmaster_pb2.MetricRequest() metricRequest.component_name = componentName if len(instances) > 0: for instance in instances: metricRequest.instance_id.append(instance) for metricName in metricNames: metricRequest.metric.append(metricName) metricRequest.interval = interval # Serialize the metricRequest to send as a payload # with the HTTP request. metricRequestString = metricRequest.SerializeToString() url = "http://{0}:{1}/stats".format(host, port) request = tornado.httpclient.HTTPRequest(url, body=metricRequestString, method='POST', request_timeout=5) Log.debug("Making HTTP call to fetch metrics") Log.debug("url: " + url) try: client = tornado.httpclient.AsyncHTTPClient() result = yield client.fetch(request) Log.debug("HTTP call complete.") except tornado.httpclient.HTTPError as e: raise Exception(str(e)) # Check the response code - error if it is in 400s or 500s responseCode = result.code if responseCode >= 400: message = "Error in getting metrics from Tmaster, code: " + responseCode Log.error(message) raise Exception(message) # Parse the response from tmaster. metricResponse = tmaster_pb2.MetricResponse() metricResponse.ParseFromString(result.body) if metricResponse.status.status == common_pb2.NOTOK: if metricResponse.status.HasField("message"): Log.warn("Received response from Tmaster: %s", metricResponse.status.message) # Form the response. ret = {} ret["interval"] = metricResponse.interval ret["component"] = componentName ret["metrics"] = {} for metric in metricResponse.metric: instance = metric.instance_id for im in metric.metric: metricname = im.name value = im.value if metricname not in ret["metrics"]: ret["metrics"][metricname] = {} ret["metrics"][metricname][instance] = value raise tornado.gen.Return(ret)
def getMetricsTimeline(tmaster, component_name, metric_names, instances, start_time, end_time, callback=None): """ Get the specified metrics for the given component name of this topology. Returns the following dict on success: { "timeline": { <metricname>: { <instance>: { <start_time> : <numeric value>, <start_time> : <numeric value>, ... } ... }, ... }, "starttime": <numeric value>, "endtime": <numeric value>, "component": "..." } Returns the following dict on failure: { "message": "..." } """ # Tmaster is the proto object and must have host and port for stats. if not tmaster or not tmaster.host or not tmaster.stats_port: raise Exception("No Tmaster found") host = tmaster.host port = tmaster.stats_port # Create the proto request object to get metrics. metricRequest = tmaster_pb2.MetricRequest() metricRequest.component_name = component_name # If no instances are give, metrics for all instances # are fetched by default. if len(instances) > 0: for instance in instances: metricRequest.instance_id.append(instance) for metricName in metric_names: metricRequest.metric.append(metricName) metricRequest.explicit_interval.start = start_time metricRequest.explicit_interval.end = end_time metricRequest.minutely = True # Serialize the metricRequest to send as a payload # with the HTTP request. metricRequestString = metricRequest.SerializeToString() # Form and send the http request. url = "http://{0}:{1}/stats".format(host, port) request = tornado.httpclient.HTTPRequest(url, body=metricRequestString, method='POST', request_timeout=5) Log.debug("Making HTTP call to fetch metrics") Log.debug("url: " + url) try: client = tornado.httpclient.AsyncHTTPClient() result = yield client.fetch(request) Log.debug("HTTP call complete.") except tornado.httpclient.HTTPError as e: raise Exception(str(e)) # Check the response code - error if it is in 400s or 500s responseCode = result.code if responseCode >= 400: message = "Error in getting metrics from Tmaster, code: " + responseCode Log.error(message) raise Exception(message) # Parse the response from tmaster. metricResponse = tmaster_pb2.MetricResponse() metricResponse.ParseFromString(result.body) if metricResponse.status.status == common_pb2.NOTOK: if metricResponse.status.HasField("message"): Log.warn("Received response from Tmaster: %s", metricResponse.status.message) # Form the response. ret = {} ret["starttime"] = start_time ret["endtime"] = end_time ret["component"] = component_name ret["timeline"] = {} # Loop through all the metrics # One instance corresponds to one metric, which can have # multiple IndividualMetrics for each metricname requested. for metric in metricResponse.metric: instance = metric.instance_id # Loop through all individual metrics. for im in metric.metric: metricname = im.name if metricname not in ret["timeline"]: ret["timeline"][metricname] = {} if instance not in ret["timeline"][metricname]: ret["timeline"][metricname][instance] = {} # We get minutely metrics. # Interval-values correspond to the minutely mark for which # this metric value corresponds to. for interval_value in im.interval_values: ret["timeline"][metricname][instance][interval_value.interval.start] = interval_value.value raise tornado.gen.Return(ret)
async def get_metrics_timeline( tmanager: tmanager_pb2.TManagerLocation, component_name: str, metric_names: List[str], instances: List[str], start_time: int, end_time: int, callback=None, ) -> MetricsTimeline: """ Get the specified metrics for the given component name of this topology. """ # Tmanager is the proto object and must have host and port for stats. if not tmanager or not tmanager.host or not tmanager.stats_port: raise Exception("No Tmanager found") # Create the proto request object to get metrics. request_parameters = tmanager_pb2.MetricRequest() request_parameters.component_name = component_name # If no instances are given, metrics for all instances # are fetched by default. request_parameters.instance_id.extend(instances) request_parameters.metric.extend(metric_names) request_parameters.explicit_interval.start = start_time request_parameters.explicit_interval.end = end_time request_parameters.minutely = True # Form and send the http request. url = f"http://{tmanager.host}:{tmanager.stats_port}/stats" Log.debug(f"Making HTTP call to fetch metrics: {url}") async with httpx.AsyncClient() as client: result = await client.post(url, data=request_parameters.SerializeToString()) # Check the response code - error if it is in 400s or 500s if result.status_code >= 400: message = f"Error in getting metrics from Tmanager, code: {result.code}" raise Exception(message) # Parse the response from tmanager. response_data = tmanager_pb2.MetricResponse() response_data.ParseFromString(result.content) if response_data.status.status == common_pb2.NOTOK: if response_data.status.HasField("message"): Log.warn("Received response from Tmanager: %s", response_data.status.message) timeline = {} # Loop through all the metrics # One instance corresponds to one metric, which can have # multiple IndividualMetrics for each metricname requested. for metric in response_data.metric: instance = metric.instance_id # Loop through all individual metrics. for im in metric.metric: metricname = im.name if metricname not in timeline: timeline[metricname] = {} if instance not in timeline[metricname]: timeline[metricname][instance] = {} # We get minutely metrics. # Interval-values correspond to the minutely mark for which # this metric value corresponds to. for interval_value in im.interval_values: timeline[metricname][instance][ interval_value.interval.start] = interval_value.value return MetricsTimeline( starttime=start_time, endtime=end_time, component=component_name, timeline=timeline, )
def getMetricsTimeline(tmaster, component_name, metric_names, instances, start_time, end_time, callback=None): """ Get the specified metrics for the given component name of this topology. Returns the following dict on success: { "timeline": { <metricname>: { <instance>: { <start_time> : <numeric value>, <start_time> : <numeric value>, ... } ... }, ... }, "starttime": <numeric value>, "endtime": <numeric value>, "component": "..." } Returns the following dict on failure: { "message": "..." } """ # Tmaster is the proto object and must have host and port for stats. if not tmaster or not tmaster.host or not tmaster.stats_port: raise Exception("No Tmaster found") host = tmaster.host port = tmaster.stats_port # Create the proto request object to get metrics. metricRequest = tmaster_pb2.MetricRequest() metricRequest.component_name = component_name # If no instances are give, metrics for all instances # are fetched by default. if len(instances) > 0: for instance in instances: metricRequest.instance_id.append(instance) for metricName in metric_names: metricRequest.metric.append(metricName) metricRequest.explicit_interval.start = start_time metricRequest.explicit_interval.end = end_time metricRequest.minutely = True # Serialize the metricRequest to send as a payload # with the HTTP request. metricRequestString = metricRequest.SerializeToString() # Form and send the http request. url = "http://{0}:{1}/stats".format(host, port) request = tornado.httpclient.HTTPRequest(url, body=metricRequestString, method='POST', request_timeout=5) Log.debug("Making HTTP call to fetch metrics") Log.debug("url: " + url) try: client = tornado.httpclient.AsyncHTTPClient() result = yield client.fetch(request) Log.debug("HTTP call complete.") except tornado.httpclient.HTTPError as e: raise Exception(str(e)) # Check the response code - error if it is in 400s or 500s responseCode = result.code if responseCode >= 400: message = "Error in getting metrics from Tmaster, code: " + responseCode Log.error(message) raise Exception(message) # Parse the response from tmaster. metricResponse = tmaster_pb2.MetricResponse() metricResponse.ParseFromString(result.body) if metricResponse.status.status == common_pb2.NOTOK: if metricResponse.status.HasField("message"): Log.warn("Received response from Tmaster: %s", metricResponse.status.message) # Form the response. ret = {} ret["starttime"] = start_time ret["endtime"] = end_time ret["component"] = component_name ret["timeline"] = {} # Loop through all the metrics # One instance corresponds to one metric, which can have # multiple IndividualMetrics for each metricname requested. for metric in metricResponse.metric: instance = metric.instance_id # Loop through all individual metrics. for im in metric.metric: metricname = im.name if metricname not in ret["timeline"]: ret["timeline"][metricname] = {} if instance not in ret["timeline"][metricname]: ret["timeline"][metricname][instance] = {} # We get minutely metrics. # Interval-values correspond to the minutely mark for which # this metric value corresponds to. for interval_value in im.interval_values: ret["timeline"][metricname][instance][ interval_value.interval.start] = interval_value.value raise tornado.gen.Return(ret)