def get_log_detect_result_test(): ne_ad = "gjjcore2" ts_key = find_timestamp_key(1614290340) ne2ts2logs = dict() ne2ts2logs["gjjcore2"] = dict() ne2ts2logs["gjjcore2"][ts_key] = dict() ne2ts2logs["gjjcore2"][ts_key]["test"] = [] answer = [] get_log_detect_result(ne_ad, ts_key, ne2ts2logs, answer, consumer_logger)
def save_trace_a(self, trace_data): """ 针对a系统的traces都是只有一条,因此直接保存该网元以及对应的duration,后续用来判断是否异常 :param trace_data: 单条trace数据 :return: """ ts = find_timestamp_key(trace_data["timestamp"]) if ts not in self.timestamp2traces: self.timestamp2traces[ts] = [] self.timestamp2traces[ts].append((trace_data["cmdb_id"], trace_data["duration"]))
def get_metric_detect_result_test(): ne_ad = "gjjcore2" ts_key = find_timestamp_key(1614290340) metric_model = MetricSimpleDetection("a") ne2ts2metrics = dict() ne2ts2metrics["gjjcore2"] = dict() ne2ts2metrics["gjjcore2"][ts_key] = dict() ne2ts2metrics["gjjcore2"][ts_key]["test"] = [] answer = [] get_metric_detect_result(ne_ad, ts_key, metric_model, ne2ts2metrics, answer, consumer_logger)
def save_metric(self, metric_data): ne = metric_data["cmdb_id"] ts = find_timestamp_key(metric_data["timestamp"]) old_ts = ts - 300 * 3 kpi_name = metric_data["kpi_name"] value = metric_data["value"] if ne not in self.ne2ts2metrics: self.ne2ts2metrics[ne] = dict() if ts not in self.ne2ts2metrics[ne]: self.ne2ts2metrics[ne][ts] = dict() if kpi_name not in self.ne2ts2metrics[ne][ts]: self.ne2ts2metrics[ne][ts][kpi_name] = [] # 进行淘汰工作 if old_ts in self.ne2ts2metrics[ne]: del self.ne2ts2metrics[ne][old_ts] self.ne2ts2metrics[ne][ts][kpi_name].append(value)
def save_trace_b(self, trace_data): ts = find_timestamp_key(trace_data["timestamp"]) if ts not in self.timestamp2traces: self.timestamp2traces[ts] = dict() if trace_data["trace_id"] not in self.timestamp2traces[ts]: self.timestamp2traces[ts][trace_data["trace_id"]] = [] # TODO 是不是无需字典 self.timestamp2traces[ts][trace_data["trace_id"]].append({ "cmdb_id": trace_data["cmdb_id"], "parent_id": trace_data["parent_id"], "span_id": trace_data["span_id"], "duration": trace_data["duration"] })
def save_logs_b(self, log_data): log_name = log_data["log_name"] if log_name == "gc": ne = log_data["cmdb_id"] ts = find_timestamp_key(log_data["timestamp"]) old_ts = ts - 300 logs = log_data["log_id"] + "," + str( ts) + "," + ne + "," + log_name + "," + log_data["value"] if ne not in self.ne2ts2logs: self.ne2ts2logs[ne] = dict() if ts not in self.ne2ts2logs[ne]: self.ne2ts2logs[ne][ts] = dict() if log_name not in self.ne2ts2logs[ne][ts]: self.ne2ts2logs[ne][ts][log_name] = [] # 进行淘汰工作 if old_ts in self.ne2ts2logs[ne]: del self.ne2ts2logs[ne][old_ts] self.ne2ts2logs[ne][ts][log_name].append(logs)
def detect(self, ad_timestamp): ts = find_timestamp_key(ad_timestamp) # 目前只考虑当前区间 prev_ts = ts - 300 curr_ts = ts next_ts = ts + 300 if prev_ts in list(self.timestamp2traces.keys()): for ele in self.timestamp2traces[prev_ts]: self.duration_detect(ele[0], ele[1]) if curr_ts in list(self.timestamp2traces.keys()): for ele in self.timestamp2traces[curr_ts]: self.duration_detect(ele[0], ele[1]) # if next_ts in self.timestamp2traces: # for ele in self.timestamp2traces[next_ts]: # self.duration_detect(ele[0], ele[1]) result_ = sorted(self.ad_ele.items(), key=lambda kv: (kv[1], kv[0]), reverse=True) self.ad_ele.clear() result = [] for ele in result_[:4]: result.append(ele[0]) return result
def start_detect(q, ts2traces, ne2ts2metrics, ne2ts2logs): cur_path = os.path.abspath(os.path.dirname(__file__)) trace_baseline_path = cur_path + "/trace_detection_algorithm/system-b_trace_0311.csv_22266996_trace_baseline.txt" trace_model = TraceSimpleDetectionB(trace_baseline_path, ts2traces) # TODO 暂时先用,后续可能会用更合适的模型 metric_model = MetricSimpleDetection("b") consumer_logger.info('start get q') while True: try: ad_ts = q.get() consumer_logger.info("ad ts: " + str(ad_ts)) strtime = time.strftime("%Y--%m--%d %H:%M:%S", time.localtime(ad_ts)) consumer_logger.info(strtime) trace_result = trace_model.detect_b(ad_ts) if len(trace_result) == 0: consumer_logger.info("in " + str(ad_ts) + " no trace error") else: ne_ad = trace_result[0] consumer_logger.info("trace ad: " + str(ne_ad)) consumer_logger.info("start metric ad") ts_key = find_timestamp_key(ad_ts) answer = [] get_metric_detect_result(ne_ad, ts_key, metric_model, ne2ts2metrics, answer, consumer_logger) get_log_detect_result(ne_ad, ts_key, ne2ts2logs, answer, consumer_logger) if len(trace_result) > 2: consumer_logger.info("trace ad2: " + str(ne_ad)) consumer_logger.info("start metric ad") ne_ad2 = trace_result[1] get_metric_detect_result(ne_ad2, ts_key, metric_model, ne2ts2metrics, answer, consumer_logger) get_log_detect_result(ne_ad2, ts_key, ne2ts2logs, answer, consumer_logger) if len(answer) == 0: consumer_logger.info("in " + str(ad_ts) + " and trace_ad: " + str(ne_ad) + " no metric/log error") else: submit_response = submit(answer) consumer_logger.info("submit_response: " + submit_response) consumer_logger.info("answer: " + str(answer)) except Exception as e: consumer_logger.error("exception happened: " + str(e) + " " + str(e.args)) continue
def detect_b(self, ad_timestamp): ts = find_timestamp_key(ad_timestamp) curr_ts = ts if curr_ts in list(self.timestamp2traces.keys()): for ele in list(self.timestamp2traces[curr_ts].keys()): parent_id2index = dict() trace_datas = self.timestamp2traces[curr_ts][ele] for i in range(0, len(trace_datas)): parent_id2index[trace_datas[i]["span_id"]] = i self.roots[ele] = dict() self.create_tree(ele, trace_datas, parent_id2index) self.duration_detect_b() # TODO 不不仅仅考虑异常程度,还考虑异常数量 result_ = sorted(self.ad_ele.items(), key=lambda kv: (kv[1][0], kv[0]), reverse=True) self.ad_ele.clear() result = [] for ele in result_[:4]: # print(str(ele[0]) + ": " + str(ele[1])) result.append(ele[0]) return result