def parse_error_trace(traced_data): for error_filter_key, error in six.iteritems(traced_data): error_count["Errors/Count/All"] += error["count"] if error["tracker_type"] in error_types: error_count["Errors/Count/AllWeb"] += error["count"] filter_keys = error_filter_key.split("_|") action_key = "Errors/Count/%s" % filter_keys[0] if action_key not in error_count: error_count[action_key] = error["count"] else: error_count[action_key] += error["count"] error_type_key = "Errors/Type:%s/%s" % (filter_keys[2], filter_keys[0]) if error_type_key not in error_count: error_count[error_type_key] = error["count"] else: error_count[error_type_key] += error["count"] if error["tracker_type"] == external_error: action_key = "Errors/Type:%s/%s" % ( error["status_code"], filter_keys[0]) if action_key not in error_count: error_count[action_key] = error["count"] else: error_count[action_key] += error["count"] else: error_count["Errors/Count/AllBackground"] += 1
def component_metrics(self, metric_name_ids): """ :return: """ result = [] for key, value in six.iteritems(self.__time_packets): extend_metrics = key[0].split("|") if len(extend_metrics) == 1: upload_key = {"name": key[0], "parent": key[1]} upload_key_str = '%s:%s' % (key[0], key[1]) upload_key = upload_key if upload_key_str not in metric_name_ids else metric_name_ids[ upload_key_str] result.append([upload_key, value]) elif len(extend_metrics) == 3: upload_key = { "name": extend_metrics[0], "parent": key[1], "calleeId": extend_metrics[1], "calleeName": extend_metrics[2] } result.append([upload_key, value]) self.__time_packets = {} return result
def _do_harvest(self, last_harvest, current_harvest, shutdown=False): """do the really harvest action :param shutdown: sign the agent status, shutdown or not :return: """ self._last_harvest = time.time() for name, application in six.iteritems(self._applications): # if application is not register to server. test it in application. try: console.debug("Harvest data for application %s", name) # reset session with follow situation: # config changed or data token invalid # license key error occurred when communicate with data collector ret = application.harvest(last_harvest, current_harvest, shutdown) if ret and (CONSTANCE_OUT_DATE_CONFIG == ret[0] or CONSTANCE_INVALID_DATA_TOKEN == ret[0] or CONSTANCE_INVALID_LICENSE_KEY == ret[0]): console.info( "Error occurred from server, dispatcher will stop session threading and restart it.%s", ret) application.stop_connecting() application.activate_session() except Exception as err: console.exception( "Errors occurred when harvest application %s, %s", name, err) console.info("Spend %.2fs to harvest all applications.", time.time() - self._last_harvest)
def error_packets(self, metric_name_ids): """stat the error trace metric for performance :return: """ external_error, web_action_error = 'External', 'WebAction' error_types = [external_error, web_action_error] error_count = { "Errors/Count/All": 0, "Errors/Count/AllWeb": 0, "Errors/Count/AllBackground": 0 } def parse_error_trace(traced_data): for error_filter_key, error in six.iteritems(traced_data): error_count["Errors/Count/All"] += error["count"] if error["tracker_type"] in error_types: error_count["Errors/Count/AllWeb"] += error["count"] filter_keys = error_filter_key.split("_|") action_key = "Errors/Count/%s" % filter_keys[0] if action_key not in error_count: error_count[action_key] = error["count"] else: error_count[action_key] += error["count"] error_type_key = "Errors/Type:%s/%s" % (filter_keys[2], filter_keys[0]) if error_type_key not in error_count: error_count[error_type_key] = error["count"] else: error_count[error_type_key] += error["count"] if error["tracker_type"] == external_error: action_key = "Errors/Type:%s/%s" % ( error["status_code"], filter_keys[0]) if action_key not in error_count: error_count[action_key] = error["count"] else: error_count[action_key] += error["count"] else: error_count["Errors/Count/AllBackground"] += 1 parse_error_trace(self.__traced_errors) parse_error_trace(self.__traced_external_errors) stat_value = [] for key, value in six.iteritems(error_count): upload_key = {"name": key} upload_key_str = '%s' % key upload_key = upload_key if upload_key_str not in metric_name_ids else metric_name_ids[ upload_key_str] stat_value.append([upload_key, [value]]) return stat_value
def exception_packets(self, metric_name_ids): """ :param metric_name_ids: :return: """ exception_count = { "Exception/Count/All": 0, "Exception/Count/AllWeb": 0, "Exception/Count/AllBackground": 0 } for ex_filter_key, ex in six.iteritems(self.__traced_exception): exception_count["Exception/Count/All"] += ex["count"] filter_keys = ex_filter_key.split("_|") if ex["tracker_type"] == 'WebAction': exception_count["Exception/Count/AllWeb"] += ex["count"] action_key = "Exception/Count/%s" % filter_keys[0] if action_key not in exception_count: exception_count[action_key] = ex["count"] else: exception_count[action_key] += ex["count"] exception_type_key = "Exception/Type:%s/%s" % (filter_keys[1], filter_keys[0]) if exception_type_key not in exception_count: exception_count[exception_type_key] = ex["count"] else: exception_count[exception_type_key] += ex["count"] else: exception_count["Exception/Count/AllBackground"] += 1 stat_value = [] for key, value in six.iteritems(exception_count): upload_key = {"name": key} upload_key_str = '%s' % key upload_key = upload_key if upload_key_str not in metric_name_ids else metric_name_ids[ upload_key_str] stat_value.append([upload_key, [value]]) return stat_value
def general_trace_metric(self, metric_name_ids): """ :return: """ result = [] for key, value in six.iteritems(self.__general_packets): upload_key = {"name": key[0]} upload_key_str = '%s' % key[0] upload_key = upload_key if upload_key_str not in metric_name_ids else metric_name_ids[ upload_key_str] result.append([upload_key, value]) self.__general_packets = {} return result
def apdex_data(self, metric_name_ids): """ :return: """ result = [] for key, value in six.iteritems(self.__apdex_packets): upload_key = {"name": key[0]} upload_key_str = '%s' % key[0] upload_key = upload_key if upload_key_str not in metric_name_ids else metric_name_ids[upload_key_str] result.append([upload_key, value]) # reset the data if returned for upload. self.__apdex_packets = {} return result
def component_metrics(self, metric_name_ids): """ :return: """ result = [] for key, value in six.iteritems(self.__time_packets): upload_key = {"name": key[0], "parent": key[1]} upload_key_str = '%s:%s' % (key[0], key[1]) upload_key = upload_key if upload_key_str not in metric_name_ids else metric_name_ids[ upload_key_str] result.append([upload_key, value]) self.__time_packets = {} return result
def rollback(self, stat, merge_performance=True): """rollback the performance data when upload the data failed. except the traced error count. :param stat: :param merge_performance: :return: """ if not merge_performance: return console.warning( "Agent will rollback the data which is captured at last time. That indicates your network is" " broken.") for key, value in six.iteritems(stat.__time_packets): packets = self.__time_packets.get(key) if not packets: self.__time_packets[key] = copy.copy(value) else: packets.merge_packets(value) for key, value in six.iteritems(stat.__apdex_packets): packets = self.__apdex_packets.get(key) if not packets: self.__apdex_packets[key] = copy.copy(value) else: packets.merge_packets(value) for key, value in six.iteritems(stat.__action_packets): packets = self.__action_packets.get(key) if not packets: self.__action_packets[key] = copy.copy(value) else: packets.merge_packets(value) for key, value in six.iteritems(stat.__general_packets): packets = self.__general_packets.get(key) if not packets: self.__general_packets[key] = copy.copy(value) else: packets.merge_packets(value) for key, value in six.iteritems(stat.__traced_errors): packets = self.__traced_errors.get(key) if not packets: self.__traced_errors[key] = copy.copy(value) else: packets["count"] += value["count"] for key, value in six.iteritems(stat.__traced_external_errors): packets = self.__traced_external_errors.get(key) if not packets: self.__traced_external_errors[key] = copy.copy(value) else: packets["count"] += value["count"]
def action_metrics(self, metric_name_ids): """ :return: """ result = [] for key, value in six.iteritems(self.__action_packets): upload_key = {"name": key[0]} upload_key_str = '%s' % key[0] upload_key = upload_key if upload_key_str not in metric_name_ids else metric_name_ids[upload_key_str] if 0 != len(self.__settings.quantile): result.append([upload_key, value, self.__quantile[upload_key_str].markers]) else: result.append([upload_key, value]) # reset the data if returned for upload. self.__action_packets = {} return result
def general_trace_metric(self, metric_name_ids): """ :return: """ result = [] for key, value in six.iteritems(self.__general_packets): extend_keys = key[0].split("|") if len(extend_keys) == 1: upload_key = {"name": key[0]} upload_key_str = '%s' % key[0] upload_key = upload_key if upload_key_str not in metric_name_ids else metric_name_ids[upload_key_str] result.append([upload_key, value]) elif len(extend_keys) == 3: # do not replace the metric with id. upload_key = {"name": extend_keys[0], "calleeId": extend_keys[1], "calleeName": extend_keys[2]} result.append([upload_key, value]) self.__general_packets = {} return result
def close_dispatcher(self, timeout=None): """shutdown the controller through the event signal """ if timeout is None: timeout = self._config.shutdown_timeout if not self._harvest_shutdown.isSet(): return # stop the connecting thread, if has. for name, application in six.iteritems(self._applications): console.info( "Interpreter shutdown, terminal app connect threading now.") application.stop_connecting() self._harvest_shutdown.set() self._harvest_thread.join(timeout) console.info('Tingyun agent is Shutdown...')
def merge_metric_packets(self, snapshot): """ :param snapshot: :return: """ for key, value in six.iteritems(snapshot.__time_packets): packets = self.__time_packets.get(key) if not packets: self.__time_packets[key] = copy.copy(value) else: packets.merge_packets(value) for key, value in six.iteritems(snapshot.__apdex_packets): packets = self.__apdex_packets.get(key) if not packets: self.__apdex_packets[key] = copy.copy(value) else: packets.merge_packets(value) for key, value in six.iteritems(snapshot.__action_packets): packets = self.__action_packets.get(key) if not packets: self.__action_packets[key] = copy.copy(value) else: packets.merge_packets(value) # TODO: think more about the background task for key, value in six.iteritems(snapshot.__traced_errors): packets = self.__traced_errors.get(key) if not packets: self.__traced_errors[key] = copy.copy(value) else: packets["item"][-3] += value["count"] for key, value in six.iteritems(snapshot.__traced_external_errors): packets = self.__traced_external_errors.get(key) if not packets: self.__traced_external_errors[key] = copy.copy(value) else: packets["item"][-3] += value["count"] # generate general data for key, value in six.iteritems(snapshot.__general_packets): packets = self.__general_packets.get(key) if not packets: self.__general_packets[key] = copy.copy(value) else: packets.merge_packets(value) # for action trace top_n = self.__settings.action_tracer.top_n for key, value in six.iteritems(snapshot.__slow_action): if key not in self.__slow_action: self.__slow_action[key] = value break slow_actons = self.__slow_action.get(key) # although the target action trace value is `list`, but it only has 1 element in one metric. if len(slow_actons) > top_n: console.debug("The action trace is reach the top(%s), action(%s) is ignored.", top_n, key) break slow_actons.extend(value) # for slow sql max_sql = self.__settings.slow_sql_count for key, value in six.iteritems(snapshot.__slow_sql_packets): if len(self.__slow_sql_packets) > max_sql: console.debug("The slow sql trace count is reach the top.") continue slow_sql = self.__slow_sql_packets.get(key) if not slow_sql: self.__slow_sql_packets[key] = value else: slow_sql.merge_packets(value) # for quantile if 0 != len(self.__settings.quantile): action, duration = snapshot.__quantile_data if action not in self.__quantile: self.__quantile[action] = QuantileP2(self.__settings.quantile) self.__quantile[action].add(duration)
def merge_metric_packets(self, snapshot): """从慢过程以及其他错误信用发生的概率来看,慢过程、错误属于小概率事件,所以不在数据接入时过滤, 而是在合并时判断是否超过限制 :param snapshot: :return: """ for key, value in six.iteritems(snapshot.__time_packets): packets = self.__time_packets.get(key) if not packets: self.__time_packets[key] = copy.copy(value) else: packets.merge_packets(value) for key, value in six.iteritems(snapshot.__apdex_packets): packets = self.__apdex_packets.get(key) if not packets: self.__apdex_packets[key] = copy.copy(value) else: packets.merge_packets(value) for key, value in six.iteritems(snapshot.__action_packets): packets = self.__action_packets.get(key) if not packets: self.__action_packets[key] = copy.copy(value) else: packets.merge_packets(value) # TODO: think more about the background task for key, value in six.iteritems(snapshot.__traced_errors): if len(self.__traced_errors.get( key, [])) > self.__settings.max_error_trace: console.debug("Error trace is reached maximum limitation. %s", self.__settings.max_error_trace) continue packets = self.__traced_errors.get(key) if not packets: self.__traced_errors[key] = copy.copy(value) else: packets["item"][-4] += value["count"] packets["count"] += value["count"] for key, value in six.iteritems(snapshot.__traced_exception): if len(self.__traced_exception.get( key, [])) > self.__settings.exception.max_type_count: console.debug( "Exception trace is reached maximum limitation. %s", self.__settings.exception.max_type_count) continue packets = self.__traced_exception.get(key) if not packets: self.__traced_exception[key] = copy.copy(value) else: packets["item"][-4] += value["count"] packets["count"] += value["count"] for key, value in six.iteritems(snapshot.__traced_external_errors): if len(self.__traced_external_errors.get( key, [])) > self.__settings.max_error_trace: console.debug( "External error trace is reached maximum limitation. %s", self.__settings.max_error_trace) continue packets = self.__traced_external_errors.get(key) if not packets: self.__traced_external_errors[key] = copy.copy(value) else: packets["item"][-3] += value["count"] packets["count"] += value["count"] # generate general data for key, value in six.iteritems(snapshot.__general_packets): packets = self.__general_packets.get(key) if not packets: self.__general_packets[key] = copy.copy(value) else: packets.merge_packets(value) # for action trace top_n = self.__settings.action_tracer.max_action_trace_per_action for key, value in six.iteritems(snapshot.__slow_action): # although the target action trace value is `list`, but it only has 1 element in one metric. if len(self.__slow_action.get(key, [])) > top_n: console.debug( "The action trace is reach the top(%s), action(%s) is ignored.", top_n, key) break if key not in self.__slow_action: self.__slow_action[key] = value else: self.__slow_action[key].extend(value) # for slow sql max_sql = self.__settings.slow_sql_count for key, value in six.iteritems(snapshot.__slow_sql_packets): if len(self.__slow_sql_packets) > max_sql: console.debug("Slow sql count is more than max count %s ", max_sql) continue slow_sql = self.__slow_sql_packets.get(key) if not slow_sql: self.__slow_sql_packets[key] = value else: slow_sql.merge_packets(value) # for quantile if 0 != len(self.__settings.quantile): action, duration = snapshot.__quantile_data if action not in self.__quantile: self.__quantile[action] = QuantileP2(self.__settings.quantile) self.__quantile[action].add(duration)