Python readCSV Exemples, read_data.readCSV Python Exemples

Exemple #1

0

Afficher le fichier

def getKpis(files=None, path=None,kpis = {}):
    """[获取KPI曲线数据,将所有指标数据读到内存]
    Args:
        files ([type], optional): [传入文件列表 如list(["dcos_docker.csv"])]. Defaults to None.\n
        path: 目录，files 是path目录下的具体文件
    Returns:
        [type]: 字典{(cmdb_id,name,bomc_id,itemid):[row1,row2......]} 每一行 ['itemid', 'name', 'bomc_id', 'timestamp', 'value', 'cmdb_id']
    """
    if not path:
        path = os.path.join(data_path.get_data_path(), "平台指标")
    files = os.listdir(path) if not files else files
    for f in files:
        # f = "dcos_docker.csv"
        p = os.path.join(path, f)
        if not os.path.isfile(p):
            continue
        data = readCSV(p)
        # print(data[0])
        for row in data[1:]:
            #['itemid', 'name', 'bomc_id', 'timestamp', 'value', 'cmdb_id']
            key = "%s,%s,%s,%s" % (row[5], row[1], row[2], row[0])
            if kpis.get(key) == None:
                kpis[key] = []
            kpis[key].append(row)
        # break
    return kpis

Exemple #2

0

Afficher le fichier

def build_trace(path, res={}):
    """[summary]

    Args:
        path ([str]):  trace调用链路径 \n

    Returns:
        [type]: traces字典 格式{traceId:{ startTime:str,spans:{spanId}}} \n
    """
    # todo 将所有文件合并成 trace
    print("开始trace数据合并！")
    merge_time = []
    for day in datestamps:
        for traceName in traceNames:
            # csvName = "trace_csf"
            p = os.path.join(path, traceName + ".csv")
            time1 = time.time()
            print("正在读取文件 " + traceName)
            temp = readCSV(p)
            print("读取" + traceName + "完毕，开始生成trace")
            for i in tqdm(range(len(temp) - 1),
                          desc=traceName,
                          ncols=100,
                          ascii=' =',
                          bar_format='{l_bar}{bar}|'):
                # 0 callType,1 startTime,2 elapsedTime,3 success,4 traceId,5 id,6 pid,7 cmdb_id,8 serviceName
                row = temp[i + 1]
                if len(row) <= 3:
                    continue
                # 通过row 构造span
                span = generate_span(row)
                # 将span放到相应的trace中
                traceId, span_id = row[4], row[5]
                if res.get(traceId) == None:
                    res[traceId] = {
                        "startTime": span["timestamp"],
                        "spans": {}
                    }
                spans = res[traceId]["spans"]
                spans[span_id] = span
                # break
            time_spend = time.time() - time1
            merge_time.append(time_spend)
            print("文件" + traceName + "_" + day + ".csv " + "合并完毕,共花费 " +
                  str(time_spend) + "S")
            # break
        # break
    print("Trace 合并完毕！共花费 " + str(sum(merge_time)) + "S,分别是", merge_time)
    return res

Exemple #3

0

Afficher le fichier

Fichier : sort_data.py Projet : eadasfa/Aiops

def order(path):
    files = os.listdir(path)
    for file in files:
        temp = readCSV(os.path.join(path, file))
        row_first = temp[0]
        temp = sorted(temp[1:], key=lambda x: x[1])
        new_name = file.split(".")[0] + "_sorted"
        with open(path + new_name + ".csv", 'w', newline="") as fd:
            writer = csv.writer(fd)
            writer.writerow(row_first)
            for row in tqdm(temp,
                            desc=file,
                            ncols=100,
                            ascii=' =',
                            bar_format='{l_bar}{bar}|'):
                writer.writerow(row)
        print("文件" + file + "排序完成")

Exemple #4

0

Afficher le fichier

Fichier : sort_data.py Projet : eadasfa/Aiops

def divide_file(path, save_path=None):  # 返回文件存储路径
    files = os.listdir(path)

    save_path = path if not save_path else save_path
    for file_name in files:
        # 跳过文件夹，只读取csv文件
        if "csv" not in file_name:
            continue
        # 读取一个文件数据
        data = readCSV(os.path.join(path, file_name))[1:]
        # 获取该文件的前缀如 db、docker、os、redis
        new_dir = file_name.split(
            '_')[0] if 'dcos' not in file_name else re.split(
                '[_.]', file_name)[1]
        new_dir = 'redis' if 'redis' in file_name else new_dir

        # 建立一个新的目录
        new_path = os.path.join(save_path, new_dir)
        if not os.path.exists(new_path):
            os.mkdir(new_path)
        # 存储所有指标对应的数据
        data_of_indicators = {}  #{ 指标名:data}
        # 遍历数据
        for row in data:
            indicator_name = row[1]  # 获取指标名
            # 判断是不是第一次遇见新的指标名,是的话创建
            if not data_of_indicators.get(indicator_name):
                data_of_indicators[indicator_name] = []
            data_of_indicators[indicator_name].append(row)
        # 将这些指标写入到文件
        print(file_name)
        for indicator_name, data in tqdm(data_of_indicators.items(),
                                         desc="写入文件中",
                                         ncols=100,
                                         ascii=' #',
                                         bar_format='{l_bar}{bar}|'):
            p = os.path.join(new_path, indicator_name + ".csv")
            with open(p, 'w', newline="") as fd:
                writer = csv.writer(fd)
                for row in data:
                    writer.writerow(row)
    return save_path

Exemple #5

0

Afficher le fichier

Fichier : build_trace.py Projet : eadasfa/Aiops

def get_kpis_for_an_indicator(timeStamp, cmd_id, bomc_id, sample_period,
                              file_path):
    '''
    timeStamp:时间戳 \n
    cmd_id:类似docker_007   \n
    key: docker,cmd_id 前面部分，用作路径   \n
    indicator_name:指标名   \n
    sample_period:取样周期  \n
    '''
    # todo 获取该指标文件存储路径
    res = ""
    # todo 如果该文件没有读取过
    if file_now.get(file_path) == None:
        csv_file = readCSV(file_path)
        if not csv_file:
            return {}
        res = sorted(csv_file[1:], key=lambda x: x[3])
        file_now[file_path] = res
    else:  # 否则直接从dict中读
        res = file_now[file_path]
    valueJson = {}
    timeJson = {}
    low_index, high_index = binarySearch(res, timeStamp-sample_period, 0, len(res)-1), \
        binarySearch(res, timeStamp+sample_period, 0, len(res)-1)
    # print(cmd_id,indicator_name,low_index,high_index)
    # itemid,name,bomc_id,timestamp,value,cmdb_id
    for i in range(low_index, high_index):
        row = res[i]
        time = abs(int(row[3]) - timeStamp)
        if row[5] == cmd_id and bomc_id == row[2]:
            # 记录的KEY
            new_key = '(%s,%s,%s)' % (row[0], row[1], row[2])
            if valueJson.get(new_key) != None:  # 如果已经有了
                if time < timeJson[new_key]:
                    valueJson[new_key] = row[4]
                    timeJson[new_key] = time
                else:  # res是按照时间递增的,因此开始时一定是逐渐减少，之后一定是开始增大
                    break
            else:
                valueJson[new_key] = row[4]
                timeJson[new_key] = time
    return valueJson

Exemple #6

0

Afficher le fichier

Fichier : manipulate_data.py Projet : classicharlie/based_data_science

from read_data import readCSV
import pprint

# define the filepath to the csv and load the data set
data = readCSV('mtcars.csv')

# convert each column name into a variable
for k, v in data.items():
    exec(k + '=' + str(v))

# pretty print to make the dictionary look nicer in the console
pprint.pprint(data, compact=True)

Exemple #7

0

Afficher le fichier

Fichier : draw_figure.py Projet : eadasfa/Aiops

import numpy as np
import csv
import os
import matplotlib.pyplot as plt
import data_path
from read_data import readCSV
path = os.path.join(data_path.get_data_path(), "调用链指标", "Order_by_cmdid")


def draw(value, title):
    plt.plot(value, color='r')
    plt.title(title)
    plt.show()


if __name__ == '__main__':
    #value=[1,3,7,4,8,9,2,5,6,7,8]
    fileName = "docker_003.csv"
    filepath = os.path.join(path, fileName)
    res = np.array(readCSV(filepath))
    print(res.shape)
    print(1)
    value = res[:, 2].astype(np.float)
    draw(value, title=fileName.split(".")[0] + "_" + "csf_001")