def getPath(day=None): ''' return (p1,p2,p3) (调用链指标,平台指标,数据说明) ''' prex_path = data_path.get_data_path( ) if not day else data_path.get_data_path(day) p1 = os.path.join(prex_path, "调用链指标") p2 = os.path.join(prex_path, "平台指标") p3 = os.path.join(prex_path, "数据说明") return p1, p2, p3
def getKpis(files=None, path=None,kpis = {}): """[获取KPI曲线数据,将所有指标数据读到内存] Args: files ([type], optional): [传入文件列表 如list(["dcos_docker.csv"])]. Defaults to None.\n path: 目录,files 是path目录下的具体文件 Returns: [type]: 字典{(cmdb_id,name,bomc_id,itemid):[row1,row2......]} 每一行 ['itemid', 'name', 'bomc_id', 'timestamp', 'value', 'cmdb_id'] """ if not path: path = os.path.join(data_path.get_data_path(), "平台指标") files = os.listdir(path) if not files else files for f in files: # f = "dcos_docker.csv" p = os.path.join(path, f) if not os.path.isfile(p): continue data = readCSV(p) # print(data[0]) for row in data[1:]: #['itemid', 'name', 'bomc_id', 'timestamp', 'value', 'cmdb_id'] key = "%s,%s,%s,%s" % (row[5], row[1], row[2], row[0]) if kpis.get(key) == None: kpis[key] = [] kpis[key].append(row) # break return kpis
def main(): path = os.path.join(data_path.get_data_path(), "平台指标") print(path) print("读取文件信息:") kpis = getKpis(["dcos_docker.csv"]) filter_list = ["cpu"] print("画图中:") showKpiCurve(kpis, filter_list)
def getPath(day=None): ''' return (p1,p2,p3) (调用链指标,平台指标,数据说明) ''' prex_path = data_path.get_data_path() p1 = os.path.join(prex_path, "调用链指标") p2 = os.path.join(prex_path, "平台指标") p3 = data_path.data_instruction_path return p1, p2, p3
def start_build_trace(days=["2020_04_20"]): indicators = fill_all_indicators(data_path.data_instruction_path) time0 = time.time() ## 将平台指标的每一中指标拆分开,单独一个csv for day in days: plat_path = os.path.join(data_path.get_data_path(day), "平台指标") if len(os.listdir(plat_path)) <= 5: divide_file(plat_path) #! 保存 saveJson(build_trace(days), data_path.data_save_path(), "trace_data_" + days[0] + ".json") print("程序运行完毕!花费: " + str(time.time() - time0) + " 秒")
def build_trace(days, res={}): """[summary] Args: path ([str]): trace调用链路径 Returns: [type]: traces字典 格式{traceId:{ startTime:str,spans:{spanId}}} """ # todo 将所有文件合并成 trace print("开始trace数据合并!") merge_time = [] for day in days: for traceName in traceNames: # csvName = "trace_csf" p = os.path.join(data_path.get_data_path(day), "调用链指标", traceName + ".csv") time1 = time.time() print("正在读取文件 " + traceName) temp = readCSV(p) print("读取" + traceName + "完毕,开始生成trace") for i in tqdm(range(len(temp) - 1), desc=traceName, ncols=100, ascii=' =', bar_format='{l_bar}{bar}|'): # 0 callType,1 startTime,2 elapsedTime,3 success,4 traceId,5 id,6 pid,7 cmdb_id,8 serviceName row = temp[i + 1] if len(row) <= 3: continue # 通过row 构造span span = generate_span(row) # 将span放到相应的trace中 traceId, span_id = row[4], row[5] if res.get(traceId) == None: res[traceId] = {} trace = res[traceId] trace[span_id] = span # break time_spend = time.time() - time1 merge_time.append(time_spend) print("文件" + traceName + "_" + day + ".csv " + "合并完毕,共花费 " + str(time_spend) + "S") # break # break print("Trace 合并完毕!共花费 " + str(sum(merge_time)) + "S,分别是", merge_time) return res
def getKPIs(timeStamp, cmd_id, bias=0, day="2020_04_22"): if not cmd_id: return {} key = cmd_id.split('_')[0] # 时间偏差不大,直接返回已经记录的 if kpis.get(cmd_id) != None and abs(kpis[cmd_id]["timestamp"] - timeStamp) <= bias: return kpis[cmd_id]["values"] valueJson = {} # 遍历所有指标名,闭关将相应的指标获取 for bomc_id, (sample_period, indicator_name) in indicators[key].items(): # csv 路径 plat_path = os.path.join(data_path.get_data_path(day), "平台指标") file_path = os.path.join(plat_path, key, indicator_name + ".csv") # 指标名,取样周期 valueJson.update( get_kpis_for_an_indicator(timeStamp, cmd_id, bomc_id, sample_period * 1000, file_path)) kpis[cmd_id] = {"timestamp": timeStamp, "values": valueJson} return valueJson
def get_abnormal_interval(days, useGiven=True): business_paths = [ os.path.join(data_path.get_data_path(day), "业务指标", "esb.csv") for day in days ] # 获取业务指标数据,去掉表头,np.array data = None for p in business_paths: data = pd.concat([data, pd.read_csv(p)], ignore_index=True) data = data.values # 根据时间序列排序 data = data[np.argsort(data[:, 1])] ## !异常时间区间 interval_times, fault_ids = [], [] # 根据给出的异常文档找出异常时间段 if useGiven: interval_times, fault_ids = anomaly_detection.fault_time( bias=0 * 60 * 100, file_day=days[0], type=2) else: ## 通过自己算法找出 # 异常数据 abnormal_data = anomaly_detection.find_abnormal_data(data) # 异常时间序列 execption_times = abnormal_data[:, 1].astype(np.int64) #! 异常时间区间 interval_times = anomaly_detection.to_interval(execption_times) fault_ids = range(len(interval_times)) print(str(interval_times)) #! 对应时间区间是否是网络故障 is_net_error = [ ] # anomaly_detection.is_net_error_func(interval_times,abnormal_data) for i, j in zip(interval_times, is_net_error): print(i, j) # 画出找到的异常区间 anomaly_detection.draw_abnormal_period(data, interval_times) return interval_times, is_net_error, fault_ids
# %% import pandas as pd import numpy as np import os from data_path import get_data_path from sklearn.ensemble import IsolationForest import show_Kpis import anomaly_detection # %%读取所有Kpis day = '2020_04_22' path = os.path.join(get_data_path(day), "平台指标") kpis = show_Kpis.getKpis(["dcos_docker.csv"], path) print("Get ALL KPIS") # %% step1 获取一条具体的数据 def get_specific_kpi(kpis, filter_list): """[筛选出满足条件的具体指标] Args: kpis ([dict]): [指标字典]: filter_list ([list]): [description] Returns:[list]: [[(key,np.array),(),()]] """ res = [] for k, v in kpis.items(): if not list(filter(lambda x: x not in k, filter_list)): res.append((k, np.array(v))) return res filter_list = ["docker_004", "cpu"]
from filecmp import cmp from numpy import long from tqdm import tqdm import os import csv import re from read_data import readCSV import data_path path = os.path.join(data_path.get_data_path(), "调用链指标") def order(path): files = os.listdir(path) for file in files: temp = readCSV(os.path.join(path, file)) row_first = temp[0] temp = sorted(temp[1:], key=lambda x: x[1]) new_name = file.split(".")[0] + "_sorted" with open(path + new_name + ".csv", 'w', newline="") as fd: writer = csv.writer(fd) writer.writerow(row_first) for row in tqdm(temp, desc=file, ncols=100, ascii=' =', bar_format='{l_bar}{bar}|'): writer.writerow(row) print("文件" + file + "排序完成") #! 将平台指标的每一中指标拆分开,单独一个csv
import numpy as np import csv import os import matplotlib.pyplot as plt import data_path from read_data import readCSV path = os.path.join(data_path.get_data_path(), "调用链指标", "Order_by_cmdid") def draw(value, title): plt.plot(value, color='r') plt.title(title) plt.show() if __name__ == '__main__': #value=[1,3,7,4,8,9,2,5,6,7,8] fileName = "docker_003.csv" filepath = os.path.join(path, fileName) res = np.array(readCSV(filepath)) print(res.shape) print(1) value = res[:, 2].astype(np.float) draw(value, title=fileName.split(".")[0] + "_" + "csf_001")
res = list(reader) return res def readCsvWithPandas(p): ''' 读取CSV文件, 通过pandas ''' # todo 文件不存在直接返回 if not os.path.exists(p): return [] # todo 读取文件,并以列表的形式返回 res = [] res = pd.read_csv(p,engine="python").values return res def read_json(p): if not os.path.exists(p): return [] res=[] with open(p,'r')as f: json_data=json.load(f) return json_data if __name__ == '__main__': print("daf") day = "2020_05_22" path = os.path.join(data_path.get_data_path(day), "数据说明", "0故障说明.xlsx") table = read_xlrd(path) print(table)
#! 对应时间区间是否是网络故障 is_net_error = [ ] # anomaly_detection.is_net_error_func(interval_times,abnormal_data) for i, j in zip(interval_times, is_net_error): print(i, j) # 画出找到的异常区间 anomaly_detection.draw_abnormal_period(data, interval_times) return interval_times, is_net_error, fault_ids # %% # 调用链指标,平台指标,数据说明 # importlib.reload(anomaly_detection) plat_paths = [ os.path.join(data_path.get_data_path(day), "平台指标") for day in days ] interval_times, is_net_error, fault_ids = get_abnormal_interval(days) # print(fault_ids) # %% days = ['2020_05_22'] # ,'2020_05_23','2020_05_24','2020_05_25','2020_05_26' # ,'2020_05_27','2020_05_28','2020_05_29','2020_05_30','2020_05_31'] # todo step2 获取所有trace traces = {} for day in days: prex_path = data_path.get_data_path(day) trace_p = os.path.join(prex_path, "调用链指标") data_cleaning.build_trace(trace_p, traces) # %%
# # %% # pred = iforest(data,["avg_time","succee_rate"]) # #%% # timestamps = data[pred==-1]["startTime"].values # interval_times = to_period_time(timestamps) # print(len(interval_times)) # for t in interval_times: # print(t) # print(fault_time()) # %% if __name__ == "__main__": day = "2020_05_22" # 业务指标 business_path = os.path.join(data_path.get_data_path(day), "业务指标", "esb.csv") # 获取业务指标数据,去掉表头,np.array data = readCsvWithPandas(business_path) # 根据时间序列排序 data = data[np.argsort(data[:, 1])] # todo step1 异常时间序列 # 异常数据 abnormal_data = find_abnormal_data(data) # 异常时间序列 execption_times = abnormal_data[:, 1].astype(np.int64) # 异常时间区间 interval_times = to_interval(execption_times) is_net_error = is_net_error_func(interval_times, abnormal_data) print(len(interval_times))