def getKpis(files=None, path=None,kpis = {}): """[获取KPI曲线数据,将所有指标数据读到内存] Args: files ([type], optional): [传入文件列表 如list(["dcos_docker.csv"])]. Defaults to None.\n path: 目录,files 是path目录下的具体文件 Returns: [type]: 字典{(cmdb_id,name,bomc_id,itemid):[row1,row2......]} 每一行 ['itemid', 'name', 'bomc_id', 'timestamp', 'value', 'cmdb_id'] """ if not path: path = os.path.join(data_path.get_data_path(), "平台指标") files = os.listdir(path) if not files else files for f in files: # f = "dcos_docker.csv" p = os.path.join(path, f) if not os.path.isfile(p): continue data = readCSV(p) # print(data[0]) for row in data[1:]: #['itemid', 'name', 'bomc_id', 'timestamp', 'value', 'cmdb_id'] key = "%s,%s,%s,%s" % (row[5], row[1], row[2], row[0]) if kpis.get(key) == None: kpis[key] = [] kpis[key].append(row) # break return kpis
def build_trace(path, res={}): """[summary] Args: path ([str]): trace调用链路径 \n Returns: [type]: traces字典 格式{traceId:{ startTime:str,spans:{spanId}}} \n """ # todo 将所有文件合并成 trace print("开始trace数据合并!") merge_time = [] for day in datestamps: for traceName in traceNames: # csvName = "trace_csf" p = os.path.join(path, traceName + ".csv") time1 = time.time() print("正在读取文件 " + traceName) temp = readCSV(p) print("读取" + traceName + "完毕,开始生成trace") for i in tqdm(range(len(temp) - 1), desc=traceName, ncols=100, ascii=' =', bar_format='{l_bar}{bar}|'): # 0 callType,1 startTime,2 elapsedTime,3 success,4 traceId,5 id,6 pid,7 cmdb_id,8 serviceName row = temp[i + 1] if len(row) <= 3: continue # 通过row 构造span span = generate_span(row) # 将span放到相应的trace中 traceId, span_id = row[4], row[5] if res.get(traceId) == None: res[traceId] = { "startTime": span["timestamp"], "spans": {} } spans = res[traceId]["spans"] spans[span_id] = span # break time_spend = time.time() - time1 merge_time.append(time_spend) print("文件" + traceName + "_" + day + ".csv " + "合并完毕,共花费 " + str(time_spend) + "S") # break # break print("Trace 合并完毕!共花费 " + str(sum(merge_time)) + "S,分别是", merge_time) return res
def order(path): files = os.listdir(path) for file in files: temp = readCSV(os.path.join(path, file)) row_first = temp[0] temp = sorted(temp[1:], key=lambda x: x[1]) new_name = file.split(".")[0] + "_sorted" with open(path + new_name + ".csv", 'w', newline="") as fd: writer = csv.writer(fd) writer.writerow(row_first) for row in tqdm(temp, desc=file, ncols=100, ascii=' =', bar_format='{l_bar}{bar}|'): writer.writerow(row) print("文件" + file + "排序完成")
def divide_file(path, save_path=None): # 返回文件存储路径 files = os.listdir(path) save_path = path if not save_path else save_path for file_name in files: # 跳过文件夹,只读取csv文件 if "csv" not in file_name: continue # 读取一个文件数据 data = readCSV(os.path.join(path, file_name))[1:] # 获取该文件的前缀如 db、docker、os、redis new_dir = file_name.split( '_')[0] if 'dcos' not in file_name else re.split( '[_.]', file_name)[1] new_dir = 'redis' if 'redis' in file_name else new_dir # 建立一个新的目录 new_path = os.path.join(save_path, new_dir) if not os.path.exists(new_path): os.mkdir(new_path) # 存储所有指标对应的数据 data_of_indicators = {} #{ 指标名:data} # 遍历数据 for row in data: indicator_name = row[1] # 获取指标名 # 判断是不是第一次遇见新的指标名,是的话创建 if not data_of_indicators.get(indicator_name): data_of_indicators[indicator_name] = [] data_of_indicators[indicator_name].append(row) # 将这些指标写入到文件 print(file_name) for indicator_name, data in tqdm(data_of_indicators.items(), desc="写入文件中", ncols=100, ascii=' #', bar_format='{l_bar}{bar}|'): p = os.path.join(new_path, indicator_name + ".csv") with open(p, 'w', newline="") as fd: writer = csv.writer(fd) for row in data: writer.writerow(row) return save_path
def get_kpis_for_an_indicator(timeStamp, cmd_id, bomc_id, sample_period, file_path): ''' timeStamp:时间戳 \n cmd_id:类似docker_007 \n key: docker,cmd_id 前面部分,用作路径 \n indicator_name:指标名 \n sample_period:取样周期 \n ''' # todo 获取该指标文件存储路径 res = "" # todo 如果该文件没有读取过 if file_now.get(file_path) == None: csv_file = readCSV(file_path) if not csv_file: return {} res = sorted(csv_file[1:], key=lambda x: x[3]) file_now[file_path] = res else: # 否则直接从dict中读 res = file_now[file_path] valueJson = {} timeJson = {} low_index, high_index = binarySearch(res, timeStamp-sample_period, 0, len(res)-1), \ binarySearch(res, timeStamp+sample_period, 0, len(res)-1) # print(cmd_id,indicator_name,low_index,high_index) # itemid,name,bomc_id,timestamp,value,cmdb_id for i in range(low_index, high_index): row = res[i] time = abs(int(row[3]) - timeStamp) if row[5] == cmd_id and bomc_id == row[2]: # 记录的KEY new_key = '(%s,%s,%s)' % (row[0], row[1], row[2]) if valueJson.get(new_key) != None: # 如果已经有了 if time < timeJson[new_key]: valueJson[new_key] = row[4] timeJson[new_key] = time else: # res是按照时间递增的,因此开始时一定是逐渐减少,之后一定是开始增大 break else: valueJson[new_key] = row[4] timeJson[new_key] = time return valueJson
from read_data import readCSV import pprint # define the filepath to the csv and load the data set data = readCSV('mtcars.csv') # convert each column name into a variable for k, v in data.items(): exec(k + '=' + str(v)) # pretty print to make the dictionary look nicer in the console pprint.pprint(data, compact=True)
import numpy as np import csv import os import matplotlib.pyplot as plt import data_path from read_data import readCSV path = os.path.join(data_path.get_data_path(), "调用链指标", "Order_by_cmdid") def draw(value, title): plt.plot(value, color='r') plt.title(title) plt.show() if __name__ == '__main__': #value=[1,3,7,4,8,9,2,5,6,7,8] fileName = "docker_003.csv" filepath = os.path.join(path, fileName) res = np.array(readCSV(filepath)) print(res.shape) print(1) value = res[:, 2].astype(np.float) draw(value, title=fileName.split(".")[0] + "_" + "csf_001")