def get_metrics(self): if not self.init: self._init_properties() self.heartbeat = LongGauge( HeartbeatMetric.NAME, 'Heartbeat metric with custom dimensions', 'count', list(self.properties.keys()), ) self.heartbeat.get_or_create_time_series( list(self.properties.values())) self.init = True elif self.vm_retry: # Only need to possibly update if vm retry if self._get_azure_compute_metadata() and not self.vm_retry: self._populate_vm_data() # Recreate the metric to initialize key/values self.heartbeat = LongGauge( HeartbeatMetric.NAME, 'Heartbeat metric with custom dimensions', 'count', list(self.properties.keys()), ) self.heartbeat.get_or_create_time_series( list(self.properties.values())) if self.heartbeat: return [self.heartbeat.get_metric(datetime.datetime.utcnow())] else: return []
def __init__(self): self.vm_data = {} self.is_vm = False self.properties = OrderedDict() self.update_properties() self.heartbeat = LongGauge( HeartbeatMetric.NAME, 'Heartbeat metric with custom dimensions', 'count', list(self.properties.keys()), ) self.heartbeat.get_or_create_time_series(list( self.properties.values()))
def get_metrics(self): if self.is_vm: # Only need to update if in vm (properties could change) self.properties.clear() self.update_properties() self.heartbeat = LongGauge( HeartbeatMetric.NAME, 'Heartbeat metric with custom dimensions', 'count', list(self.properties.keys()), ) self.heartbeat.get_or_create_time_series( list(self.properties.values())) return [self.heartbeat.get_metric(datetime.datetime.utcnow())]
def __init__(self, options): self._options = options self._instrumentation_key = options.instrumentation_key self._feature = _StatsbeatFeature.NONE if options.enable_local_storage: self._feature |= _StatsbeatFeature.DISK_RETRY if options.credential: self._feature |= _StatsbeatFeature.AAD self._stats_lock = threading.Lock() self._vm_data = {} self._vm_retry = True self._rp = _RP_NAMES[3] self._os_type = platform.system() # Attach metrics - metrics related to rp (resource provider) self._attach_metric = LongGauge( _ATTACH_METRIC_NAME, 'Statsbeat metric related to rp integrations', 'count', _get_attach_properties(), ) # Keep track of how many iterations until long export self._long_threshold_count = 0 # Network metrics - metrics related to request calls to Breeze self._network_metrics = {} # Map of gauge function -> metric # Gauge function is the callback used to populate the metric value self._network_metrics[_get_success_count_value] = DerivedLongGauge( _REQ_SUC_COUNT_NAME, 'Statsbeat metric tracking request success count', 'count', _get_network_properties(), ) self._network_metrics[_get_failure_count_value] = DerivedLongGauge( _REQ_FAIL_COUNT_NAME, 'Statsbeat metric tracking request failure count', 'count', _get_network_properties(), ) self._network_metrics[ _get_average_duration_value] = DerivedDoubleGauge( # noqa: E501 _REQ_DURATION_NAME, 'Statsbeat metric tracking average request duration', 'count', _get_network_properties(), ) self._network_metrics[_get_retry_count_value] = DerivedLongGauge( _REQ_RETRY_NAME, 'Statsbeat metric tracking request retry count', 'count', _get_network_properties(), ) self._network_metrics[_get_throttle_count_value] = DerivedLongGauge( _REQ_THROTTLE_NAME, 'Statsbeat metric tracking request throttle count', 'count', _get_network_properties(), ) self._network_metrics[_get_exception_count_value] = DerivedLongGauge( _REQ_EXCEPTION_NAME, 'Statsbeat metric tracking request exception count', 'count', _get_network_properties(), ) # feature/instrumentation metrics # metrics related to what features and instrumentations are enabled self._feature_metric = LongGauge( _FEATURE_METRIC_NAME, 'Statsbeat metric related to features enabled', # noqa: E501 'count', _get_feature_properties(), ) # Instrumentation metric uses same name/properties as feature self._instrumentation_metric = LongGauge( _FEATURE_METRIC_NAME, 'Statsbeat metric related to instrumentations enabled', # noqa: E501 'count', _get_feature_properties(), )
class _StatsbeatMetrics: def __init__(self, options): self._options = options self._instrumentation_key = options.instrumentation_key self._feature = _StatsbeatFeature.NONE if options.enable_local_storage: self._feature |= _StatsbeatFeature.DISK_RETRY if options.credential: self._feature |= _StatsbeatFeature.AAD self._stats_lock = threading.Lock() self._vm_data = {} self._vm_retry = True self._rp = _RP_NAMES[3] self._os_type = platform.system() # Attach metrics - metrics related to rp (resource provider) self._attach_metric = LongGauge( _ATTACH_METRIC_NAME, 'Statsbeat metric related to rp integrations', 'count', _get_attach_properties(), ) # Keep track of how many iterations until long export self._long_threshold_count = 0 # Network metrics - metrics related to request calls to Breeze self._network_metrics = {} # Map of gauge function -> metric # Gauge function is the callback used to populate the metric value self._network_metrics[_get_success_count_value] = DerivedLongGauge( _REQ_SUC_COUNT_NAME, 'Statsbeat metric tracking request success count', 'count', _get_network_properties(), ) self._network_metrics[_get_failure_count_value] = DerivedLongGauge( _REQ_FAIL_COUNT_NAME, 'Statsbeat metric tracking request failure count', 'count', _get_network_properties(), ) self._network_metrics[ _get_average_duration_value] = DerivedDoubleGauge( # noqa: E501 _REQ_DURATION_NAME, 'Statsbeat metric tracking average request duration', 'count', _get_network_properties(), ) self._network_metrics[_get_retry_count_value] = DerivedLongGauge( _REQ_RETRY_NAME, 'Statsbeat metric tracking request retry count', 'count', _get_network_properties(), ) self._network_metrics[_get_throttle_count_value] = DerivedLongGauge( _REQ_THROTTLE_NAME, 'Statsbeat metric tracking request throttle count', 'count', _get_network_properties(), ) self._network_metrics[_get_exception_count_value] = DerivedLongGauge( _REQ_EXCEPTION_NAME, 'Statsbeat metric tracking request exception count', 'count', _get_network_properties(), ) # feature/instrumentation metrics # metrics related to what features and instrumentations are enabled self._feature_metric = LongGauge( _FEATURE_METRIC_NAME, 'Statsbeat metric related to features enabled', # noqa: E501 'count', _get_feature_properties(), ) # Instrumentation metric uses same name/properties as feature self._instrumentation_metric = LongGauge( _FEATURE_METRIC_NAME, 'Statsbeat metric related to instrumentations enabled', # noqa: E501 'count', _get_feature_properties(), ) # Metrics that are sent on application start def get_initial_metrics(self): stats_metrics = [] if self._attach_metric: attach_metric = self._get_attach_metric() if attach_metric: stats_metrics.append(attach_metric) if self._feature_metric: feature_metric = self._get_feature_metric() if feature_metric: stats_metrics.append(feature_metric) if self._instrumentation_metric: instr_metric = self._get_instrumentation_metric() if instr_metric: stats_metrics.append(instr_metric) return stats_metrics # Metrics sent every statsbeat interval def get_metrics(self): metrics = [] try: # Initial metrics use the long export interval # Only export once long count hits threshold with self._stats_lock: self._long_threshold_count = self._long_threshold_count + 1 if self._long_threshold_count >= _STATS_LONG_INTERVAL_THRESHOLD: # noqa: E501 metrics.extend(self.get_initial_metrics()) self._long_threshold_count = 0 network_metrics = self._get_network_metrics() metrics.extend(network_metrics) except Exception as ex: _logger.warning('Error while exporting stats metrics %s.', ex) return metrics def _get_network_metrics(self): properties = self._get_common_properties() properties.append(LabelValue(_ENDPOINT_TYPES[0])) # endpoint properties.append(LabelValue(self._options.endpoint)) # host metrics = [] for fn, metric in self._network_metrics.items(): # NOTE: A time series is a set of unique label values # If the label values ever change, a separate time series will be # created, however, `_get_properties()` should never change metric.create_time_series(properties, fn) stats_metric = metric.get_metric(datetime.datetime.utcnow()) # Don't export if value is 0 if stats_metric.time_series[0].points[0].value.value != 0: metrics.append(stats_metric) return metrics def _get_feature_metric(self): properties = self._get_common_properties() properties.insert(4, LabelValue(self._feature)) # feature long properties.insert(4, LabelValue(_FEATURE_TYPES.FEATURE)) # type self._feature_metric.get_or_create_time_series(properties) return self._feature_metric.get_metric(datetime.datetime.utcnow()) def _get_instrumentation_metric(self): properties = self._get_common_properties() properties.insert(4, LabelValue(get_integrations())) # instr long properties.insert(4, LabelValue( _FEATURE_TYPES.INSTRUMENTATION)) # type # noqa: E501 self._instrumentation_metric.get_or_create_time_series(properties) return self._instrumentation_metric.get_metric( datetime.datetime.utcnow()) # noqa: E501 def _get_attach_metric(self): properties = [] rp = '' rpId = '' # rp, rpId if os.environ.get("WEBSITE_SITE_NAME") is not None: # Web apps rp = _RP_NAMES[0] rpId = '{}/{}'.format(os.environ.get("WEBSITE_SITE_NAME"), os.environ.get("WEBSITE_HOME_STAMPNAME", '')) elif os.environ.get("FUNCTIONS_WORKER_RUNTIME") is not None: # Function apps rp = _RP_NAMES[1] rpId = os.environ.get("WEBSITE_HOSTNAME") elif self._vm_retry and self._get_azure_compute_metadata(): # VM rp = _RP_NAMES[2] rpId = '{}/{}'.format(self._vm_data.get("vmId", ''), self._vm_data.get("subscriptionId", '')) self._os_type = self._vm_data.get("osType", '') else: # Not in any rp or VM metadata failed rp = _RP_NAMES[3] rpId = _RP_NAMES[3] self._rp = rp properties.extend(self._get_common_properties()) properties.insert(1, LabelValue(rpId)) # rpid self._attach_metric.get_or_create_time_series(properties) return self._attach_metric.get_metric(datetime.datetime.utcnow()) def _get_common_properties(self): properties = [] properties.append(LabelValue(self._rp)) # rp properties.append(LabelValue("sdk")) # attach type properties.append(LabelValue(self._instrumentation_key)) # cikey # runTimeVersion properties.append(LabelValue(platform.python_version())) properties.append(LabelValue(self._os_type or platform.system())) # os properties.append(LabelValue("python")) # language properties.append(LabelValue(ext_version)) # version return properties def _get_azure_compute_metadata(self): try: request_url = "{0}?{1}&{2}".format(_AIMS_URI, _AIMS_API_VERSION, _AIMS_FORMAT) response = requests.get(request_url, headers={"MetaData": "True"}, timeout=5.0) except (requests.exceptions.ConnectionError, requests.Timeout): # Not in VM self._vm_retry = False return False except requests.exceptions.RequestException: self._vm_retry = True # retry return False try: text = response.text self._vm_data = json.loads(text) except Exception: # pylint: disable=broad-except # Error in reading response body, retry self._vm_retry = True return False # Vm data is perpetually updated self._vm_retry = True return True
class HeartbeatMetric: NAME = "Heartbeat" def __init__(self): self.vm_data = {} self.vm_retry = False self.properties = OrderedDict() self._init_properties() self.heartbeat = LongGauge( HeartbeatMetric.NAME, 'Heartbeat metric with custom dimensions', 'count', list(self.properties.keys()), ) self.heartbeat.get_or_create_time_series( list(self.properties.values()) ) def get_metrics(self): if self.vm_retry: # Only need to possibly update if vm retry if self._get_azure_compute_metadata() and not self.vm_retry: self._populate_vm_data() # Recreate the metric to initialize key/values self.heartbeat = LongGauge( HeartbeatMetric.NAME, 'Heartbeat metric with custom dimensions', 'count', list(self.properties.keys()), ) self.heartbeat.get_or_create_time_series( list(self.properties.values()) ) return [self.heartbeat.get_metric(datetime.datetime.utcnow())] def _init_properties(self): self.properties[LabelKey("sdk", '')] = LabelValue( 'py{}:oc{}:ext{}'.format( platform.python_version(), opencensus_version, ext_version, ) ) self.properties[LabelKey("osType", '')] = LabelValue(platform.system()) if os.environ.get("WEBSITE_SITE_NAME") is not None: # Web apps self.properties[LabelKey("appSrv_SiteName", '')] = \ LabelValue(os.environ.get("WEBSITE_SITE_NAME")) self.properties[LabelKey("appSrv_wsStamp", '')] = \ LabelValue(os.environ.get("WEBSITE_HOME_STAMPNAME", '')) self.properties[LabelKey("appSrv_wsHost", '')] = \ LabelValue(os.environ.get("WEBSITE_HOSTNAME", '')) elif os.environ.get("FUNCTIONS_WORKER_RUNTIME") is not None: # Function apps self.properties[LabelKey("azfunction_appId", '')] = \ LabelValue(os.environ.get("WEBSITE_HOSTNAME")) elif self._get_azure_compute_metadata() and not self.vm_retry: # VM self._populate_vm_data() def _get_azure_compute_metadata(self): try: request_url = "{0}?{1}&{2}".format( _AIMS_URI, _AIMS_API_VERSION, _AIMS_FORMAT) response = requests.get( request_url, headers={"MetaData": "True"}, timeout=5.0) except (requests.exceptions.ConnectionError, requests.Timeout): # Not in VM self.vm_retry = False return False except requests.exceptions.RequestException: self.vm_retry = True # retry return False try: text = response.text self.vm_data = json.loads(text) except Exception: # pylint: disable=broad-except # Error in reading response body, retry self.vm_retry = True return False self.vm_retry = False return True def _populate_vm_data(self): if self.vm_data: self.properties[LabelKey("azInst_vmId", '')] = \ LabelValue(self.vm_data.get("vmId", '')) self.properties[LabelKey("azInst_subscriptionId", '')] = \ LabelValue(self.vm_data.get("subscriptionId", '')) self.properties[LabelKey("azInst_osType", '')] = \ LabelValue(self.vm_data.get("osType", ''))
class HeartbeatMetric: NAME = "Heartbeat" def __init__(self): self.vm_data = {} self.is_vm = False self.properties = OrderedDict() self.update_properties() self.heartbeat = LongGauge( HeartbeatMetric.NAME, 'Heartbeat metric with custom dimensions', 'count', list(self.properties.keys()), ) self.heartbeat.get_or_create_time_series(list( self.properties.values())) def get_metrics(self): if self.is_vm: # Only need to update if in vm (properties could change) self.properties.clear() self.update_properties() self.heartbeat = LongGauge( HeartbeatMetric.NAME, 'Heartbeat metric with custom dimensions', 'count', list(self.properties.keys()), ) self.heartbeat.get_or_create_time_series( list(self.properties.values())) return [self.heartbeat.get_metric(datetime.datetime.utcnow())] def update_properties(self): self.properties[LabelKey("sdk", '')] = LabelValue('py{}:oc{}:ext{}'.format( platform.python_version(), opencensus_version, ext_version, )) self.properties[LabelKey("osType", '')] = LabelValue(platform.system()) if os.environ.get("WEBSITE_SITE_NAME") is not None: # Web apps self.properties[LabelKey("appSrv_SiteName", '')] = \ LabelValue(os.environ.get("WEBSITE_SITE_NAME")) self.properties[LabelKey("appSrv_wsStamp", '')] = \ LabelValue(os.environ.get("WEBSITE_HOME_STAMPNAME", '')) self.properties[LabelKey("appSrv_wsHost", '')] = \ LabelValue(os.environ.get("WEBSITE_HOSTNAME", '')) elif os.environ.get("FUNCTIONS_WORKER_RUNTIME") is not None: # Function apps self.properties[LabelKey("azfunction_appId", '')] = \ LabelValue(os.environ.get("WEBSITE_HOSTNAME")) elif self.get_azure_compute_metadata(): # VM if self.vm_data: self.properties[LabelKey("azInst_vmId", '')] = \ LabelValue(self.vm_data.get("vmId", '')) self.properties[LabelKey("azInst_subscriptionId", '')] = \ LabelValue(self.vm_data.get("subscriptionId", '')) self.properties[LabelKey("azInst_osType", '')] = \ LabelValue(self.vm_data.get("osType", '')) def get_azure_compute_metadata(self): try: request_url = "{0}?{1}&{2}".format(_AIMS_URI, _AIMS_API_VERSION, _AIMS_FORMAT) logger.debug(f"get_azure_compute_metadata: {request_url}") response = requests.get(request_url, headers={"MetaData": "True"}, timeout=2.0) except requests.exceptions.ConnectionError: logger.info( "Cannot connect to AIMS. Assuming execution outside of Azure VM." ) # Not in VM self.is_vm = False return False except requests.exceptions.RequestException: logger.error("Request to AIMS returned an error. Retrying...", exc_info=True) pass # retry self.is_vm = True try: text = response.text self.vm_data = json.loads(text) except Exception: # pylint: disable=broad-except logger.error("Response from AIMS could not be parsed. Retrying...", exc_info=True) # Error in reading response body, retry pass return True