예제 #1
0
class HeartbeatMetric:
    NAME = "Heartbeat"

    def __init__(self):
        self.vm_data = {}
        self.vm_retry = False
        self.properties = OrderedDict()
        self._init_properties()
        self.heartbeat = LongGauge(
            HeartbeatMetric.NAME,
            'Heartbeat metric with custom dimensions',
            'count',
            list(self.properties.keys()),
        )
        self.heartbeat.get_or_create_time_series(
            list(self.properties.values())
        )

    def get_metrics(self):
        if self.vm_retry:
            # Only need to possibly update if vm retry
            if self._get_azure_compute_metadata() and not self.vm_retry:
                self._populate_vm_data()
                # Recreate the metric to initialize key/values
                self.heartbeat = LongGauge(
                    HeartbeatMetric.NAME,
                    'Heartbeat metric with custom dimensions',
                    'count',
                    list(self.properties.keys()),
                )
                self.heartbeat.get_or_create_time_series(
                    list(self.properties.values())
                )
        return [self.heartbeat.get_metric(datetime.datetime.utcnow())]

    def _init_properties(self):
        self.properties[LabelKey("sdk", '')] = LabelValue(
            'py{}:oc{}:ext{}'.format(
                platform.python_version(),
                opencensus_version,
                ext_version,
            )
        )
        self.properties[LabelKey("osType", '')] = LabelValue(platform.system())
        if os.environ.get("WEBSITE_SITE_NAME") is not None:
            # Web apps
            self.properties[LabelKey("appSrv_SiteName", '')] = \
                LabelValue(os.environ.get("WEBSITE_SITE_NAME"))
            self.properties[LabelKey("appSrv_wsStamp", '')] = \
                LabelValue(os.environ.get("WEBSITE_HOME_STAMPNAME", ''))
            self.properties[LabelKey("appSrv_wsHost", '')] = \
                LabelValue(os.environ.get("WEBSITE_HOSTNAME", ''))
        elif os.environ.get("FUNCTIONS_WORKER_RUNTIME") is not None:
            # Function apps
            self.properties[LabelKey("azfunction_appId", '')] = \
                LabelValue(os.environ.get("WEBSITE_HOSTNAME"))
        elif self._get_azure_compute_metadata() and not self.vm_retry:
            # VM
            self._populate_vm_data()

    def _get_azure_compute_metadata(self):
        try:
            request_url = "{0}?{1}&{2}".format(
                _AIMS_URI, _AIMS_API_VERSION, _AIMS_FORMAT)
            response = requests.get(
                request_url, headers={"MetaData": "True"}, timeout=5.0)
        except (requests.exceptions.ConnectionError, requests.Timeout):
            # Not in VM
            self.vm_retry = False
            return False
        except requests.exceptions.RequestException:
            self.vm_retry = True  # retry
            return False

        try:
            text = response.text
            self.vm_data = json.loads(text)
        except Exception:  # pylint: disable=broad-except
            # Error in reading response body, retry
            self.vm_retry = True
            return False

        self.vm_retry = False
        return True

    def _populate_vm_data(self):
        if self.vm_data:
            self.properties[LabelKey("azInst_vmId", '')] = \
                LabelValue(self.vm_data.get("vmId", ''))
            self.properties[LabelKey("azInst_subscriptionId", '')] = \
                LabelValue(self.vm_data.get("subscriptionId", ''))
            self.properties[LabelKey("azInst_osType", '')] = \
                LabelValue(self.vm_data.get("osType", ''))
예제 #2
0
class _StatsbeatMetrics:
    def __init__(self, options):
        self._options = options
        self._instrumentation_key = options.instrumentation_key
        self._feature = _StatsbeatFeature.NONE
        if options.enable_local_storage:
            self._feature |= _StatsbeatFeature.DISK_RETRY
        if options.credential:
            self._feature |= _StatsbeatFeature.AAD
        self._stats_lock = threading.Lock()
        self._vm_data = {}
        self._vm_retry = True
        self._rp = _RP_NAMES[3]
        self._os_type = platform.system()
        # Attach metrics - metrics related to rp (resource provider)
        self._attach_metric = LongGauge(
            _ATTACH_METRIC_NAME,
            'Statsbeat metric related to rp integrations',
            'count',
            _get_attach_properties(),
        )
        # Keep track of how many iterations until long export
        self._long_threshold_count = 0
        # Network metrics - metrics related to request calls to Breeze
        self._network_metrics = {}
        # Map of gauge function -> metric
        # Gauge function is the callback used to populate the metric value
        self._network_metrics[_get_success_count_value] = DerivedLongGauge(
            _REQ_SUC_COUNT_NAME,
            'Statsbeat metric tracking request success count',
            'count',
            _get_network_properties(),
        )
        self._network_metrics[_get_failure_count_value] = DerivedLongGauge(
            _REQ_FAIL_COUNT_NAME,
            'Statsbeat metric tracking request failure count',
            'count',
            _get_network_properties(),
        )
        self._network_metrics[
            _get_average_duration_value] = DerivedDoubleGauge(  # noqa: E501
                _REQ_DURATION_NAME,
                'Statsbeat metric tracking average request duration',
                'count',
                _get_network_properties(),
            )
        self._network_metrics[_get_retry_count_value] = DerivedLongGauge(
            _REQ_RETRY_NAME,
            'Statsbeat metric tracking request retry count',
            'count',
            _get_network_properties(),
        )
        self._network_metrics[_get_throttle_count_value] = DerivedLongGauge(
            _REQ_THROTTLE_NAME,
            'Statsbeat metric tracking request throttle count',
            'count',
            _get_network_properties(),
        )
        self._network_metrics[_get_exception_count_value] = DerivedLongGauge(
            _REQ_EXCEPTION_NAME,
            'Statsbeat metric tracking request exception count',
            'count',
            _get_network_properties(),
        )
        # feature/instrumentation metrics
        # metrics related to what features and instrumentations are enabled
        self._feature_metric = LongGauge(
            _FEATURE_METRIC_NAME,
            'Statsbeat metric related to features enabled',  # noqa: E501
            'count',
            _get_feature_properties(),
        )
        # Instrumentation metric uses same name/properties as feature
        self._instrumentation_metric = LongGauge(
            _FEATURE_METRIC_NAME,
            'Statsbeat metric related to instrumentations enabled',  # noqa: E501
            'count',
            _get_feature_properties(),
        )

    # Metrics that are sent on application start
    def get_initial_metrics(self):
        stats_metrics = []
        if self._attach_metric:
            attach_metric = self._get_attach_metric()
            if attach_metric:
                stats_metrics.append(attach_metric)
        if self._feature_metric:
            feature_metric = self._get_feature_metric()
            if feature_metric:
                stats_metrics.append(feature_metric)
        if self._instrumentation_metric:
            instr_metric = self._get_instrumentation_metric()
            if instr_metric:
                stats_metrics.append(instr_metric)
        return stats_metrics

    # Metrics sent every statsbeat interval
    def get_metrics(self):
        metrics = []
        try:
            # Initial metrics use the long export interval
            # Only export once long count hits threshold
            with self._stats_lock:
                self._long_threshold_count = self._long_threshold_count + 1
                if self._long_threshold_count >= _STATS_LONG_INTERVAL_THRESHOLD:  # noqa: E501
                    metrics.extend(self.get_initial_metrics())
                    self._long_threshold_count = 0
            network_metrics = self._get_network_metrics()
            metrics.extend(network_metrics)
        except Exception as ex:
            _logger.warning('Error while exporting stats metrics %s.', ex)

        return metrics

    def _get_network_metrics(self):
        properties = self._get_common_properties()
        properties.append(LabelValue(_ENDPOINT_TYPES[0]))  # endpoint
        properties.append(LabelValue(self._options.endpoint))  # host
        metrics = []
        for fn, metric in self._network_metrics.items():
            # NOTE: A time series is a set of unique label values
            # If the label values ever change, a separate time series will be
            # created, however, `_get_properties()` should never change
            metric.create_time_series(properties, fn)
            stats_metric = metric.get_metric(datetime.datetime.utcnow())
            # Don't export if value is 0
            if stats_metric.time_series[0].points[0].value.value != 0:
                metrics.append(stats_metric)
        return metrics

    def _get_feature_metric(self):
        properties = self._get_common_properties()
        properties.insert(4, LabelValue(self._feature))  # feature long
        properties.insert(4, LabelValue(_FEATURE_TYPES.FEATURE))  # type
        self._feature_metric.get_or_create_time_series(properties)
        return self._feature_metric.get_metric(datetime.datetime.utcnow())

    def _get_instrumentation_metric(self):
        properties = self._get_common_properties()
        properties.insert(4, LabelValue(get_integrations()))  # instr long
        properties.insert(4, LabelValue(
            _FEATURE_TYPES.INSTRUMENTATION))  # type  # noqa: E501
        self._instrumentation_metric.get_or_create_time_series(properties)
        return self._instrumentation_metric.get_metric(
            datetime.datetime.utcnow())  # noqa: E501

    def _get_attach_metric(self):
        properties = []
        rp = ''
        rpId = ''
        # rp, rpId
        if os.environ.get("WEBSITE_SITE_NAME") is not None:
            # Web apps
            rp = _RP_NAMES[0]
            rpId = '{}/{}'.format(os.environ.get("WEBSITE_SITE_NAME"),
                                  os.environ.get("WEBSITE_HOME_STAMPNAME", ''))
        elif os.environ.get("FUNCTIONS_WORKER_RUNTIME") is not None:
            # Function apps
            rp = _RP_NAMES[1]
            rpId = os.environ.get("WEBSITE_HOSTNAME")
        elif self._vm_retry and self._get_azure_compute_metadata():
            # VM
            rp = _RP_NAMES[2]
            rpId = '{}/{}'.format(self._vm_data.get("vmId", ''),
                                  self._vm_data.get("subscriptionId", ''))
            self._os_type = self._vm_data.get("osType", '')
        else:
            # Not in any rp or VM metadata failed
            rp = _RP_NAMES[3]
            rpId = _RP_NAMES[3]

        self._rp = rp
        properties.extend(self._get_common_properties())
        properties.insert(1, LabelValue(rpId))  # rpid
        self._attach_metric.get_or_create_time_series(properties)
        return self._attach_metric.get_metric(datetime.datetime.utcnow())

    def _get_common_properties(self):
        properties = []
        properties.append(LabelValue(self._rp))  # rp
        properties.append(LabelValue("sdk"))  # attach type
        properties.append(LabelValue(self._instrumentation_key))  # cikey
        # runTimeVersion
        properties.append(LabelValue(platform.python_version()))
        properties.append(LabelValue(self._os_type or platform.system()))  # os
        properties.append(LabelValue("python"))  # language
        properties.append(LabelValue(ext_version))  # version
        return properties

    def _get_azure_compute_metadata(self):
        try:
            request_url = "{0}?{1}&{2}".format(_AIMS_URI, _AIMS_API_VERSION,
                                               _AIMS_FORMAT)
            response = requests.get(request_url,
                                    headers={"MetaData": "True"},
                                    timeout=5.0)
        except (requests.exceptions.ConnectionError, requests.Timeout):
            # Not in VM
            self._vm_retry = False
            return False
        except requests.exceptions.RequestException:
            self._vm_retry = True  # retry
            return False

        try:
            text = response.text
            self._vm_data = json.loads(text)
        except Exception:  # pylint: disable=broad-except
            # Error in reading response body, retry
            self._vm_retry = True
            return False

        # Vm data is perpetually updated
        self._vm_retry = True
        return True
예제 #3
0
class HeartbeatMetric:
    NAME = "Heartbeat"

    def __init__(self):
        self.vm_data = {}
        self.is_vm = False
        self.properties = OrderedDict()
        self.update_properties()
        self.heartbeat = LongGauge(
            HeartbeatMetric.NAME,
            'Heartbeat metric with custom dimensions',
            'count',
            list(self.properties.keys()),
        )
        self.heartbeat.get_or_create_time_series(list(
            self.properties.values()))

    def get_metrics(self):
        if self.is_vm:
            # Only need to update if in vm (properties could change)
            self.properties.clear()
            self.update_properties()
            self.heartbeat = LongGauge(
                HeartbeatMetric.NAME,
                'Heartbeat metric with custom dimensions',
                'count',
                list(self.properties.keys()),
            )
            self.heartbeat.get_or_create_time_series(
                list(self.properties.values()))
        return [self.heartbeat.get_metric(datetime.datetime.utcnow())]

    def update_properties(self):
        self.properties[LabelKey("sdk",
                                 '')] = LabelValue('py{}:oc{}:ext{}'.format(
                                     platform.python_version(),
                                     opencensus_version,
                                     ext_version,
                                 ))
        self.properties[LabelKey("osType", '')] = LabelValue(platform.system())
        if os.environ.get("WEBSITE_SITE_NAME") is not None:
            # Web apps
            self.properties[LabelKey("appSrv_SiteName", '')] = \
                LabelValue(os.environ.get("WEBSITE_SITE_NAME"))
            self.properties[LabelKey("appSrv_wsStamp", '')] = \
                LabelValue(os.environ.get("WEBSITE_HOME_STAMPNAME", ''))
            self.properties[LabelKey("appSrv_wsHost", '')] = \
                LabelValue(os.environ.get("WEBSITE_HOSTNAME", ''))
        elif os.environ.get("FUNCTIONS_WORKER_RUNTIME") is not None:
            # Function apps
            self.properties[LabelKey("azfunction_appId", '')] = \
                LabelValue(os.environ.get("WEBSITE_HOSTNAME"))
        elif self.get_azure_compute_metadata():
            # VM
            if self.vm_data:
                self.properties[LabelKey("azInst_vmId", '')] = \
                    LabelValue(self.vm_data.get("vmId", ''))
                self.properties[LabelKey("azInst_subscriptionId", '')] = \
                    LabelValue(self.vm_data.get("subscriptionId", ''))
                self.properties[LabelKey("azInst_osType", '')] = \
                    LabelValue(self.vm_data.get("osType", ''))

    def get_azure_compute_metadata(self):
        try:
            request_url = "{0}?{1}&{2}".format(_AIMS_URI, _AIMS_API_VERSION,
                                               _AIMS_FORMAT)
            logger.debug(f"get_azure_compute_metadata: {request_url}")
            response = requests.get(request_url,
                                    headers={"MetaData": "True"},
                                    timeout=2.0)
        except requests.exceptions.ConnectionError:
            logger.info(
                "Cannot connect to AIMS. Assuming execution outside of Azure VM."
            )
            # Not in VM
            self.is_vm = False
            return False
        except requests.exceptions.RequestException:
            logger.error("Request to AIMS returned an error. Retrying...",
                         exc_info=True)
            pass  # retry

        self.is_vm = True
        try:
            text = response.text
            self.vm_data = json.loads(text)
        except Exception:  # pylint: disable=broad-except
            logger.error("Response from AIMS could not be parsed. Retrying...",
                         exc_info=True)

            # Error in reading response body, retry
            pass

        return True