コード例 #1
0
ファイル: run.py プロジェクト: microcosmx/algorithms
def for_model_2():
    # First Set of CSV
    trace_csv_one = [
        "110/trace_verified_sequence.csv", "110/seq_seq_sequence.csv",
        "110/seq_caller_sequence.csv"
    ]
    # Second Set of CSV
    trace_csv_two = [
        "110/trace_verified_instance3.csv", "110/seq_seq_instance3.csv",
        "110/seq_caller_instance3.csv"
    ]
    # Third Set of CSV
    trace_csv_three = [
        "110/trace_verified_config_2.csv", "110/seq_seq_config.csv",
        "110/seq_caller_config.csv"
    ]
    # Index Column Name
    index_col = "trace_id"
    # Read CONFIG and INSTANCE
    df_one_0 = pd.read_csv(trace_csv_one[0], header=0, index_col=index_col)
    df_one_1 = pd.read_csv(trace_csv_one[1], header=0, index_col=index_col)
    df_one_1.pop("test_trace_id")
    df_one_1.pop("test_case_id")
    df_one_2 = pd.read_csv(trace_csv_one[2], header=0, index_col=index_col)
    # 将各个部分的数据JOIN起来
    df_total1 = preprocessing_set.merge_data(df_trace=df_one_0,
                                             df_seq=df_one_1,
                                             df_seq_caller=df_one_2)
    df_total1.to_csv("110/model_1_seq_total")

    df_two_0 = pd.read_csv(trace_csv_two[0], header=0, index_col=index_col)
    df_two_1 = pd.read_csv(trace_csv_two[1], header=0, index_col=index_col)
    df_two_1.pop("test_trace_id")
    df_two_1.pop("test_case_id")
    df_two_2 = pd.read_csv(trace_csv_two[2], header=0, index_col=index_col)
    # 将各个部分的数据JOIN起来
    df_total2 = preprocessing_set.merge_data(df_trace=df_two_0,
                                             df_seq=df_two_1,
                                             df_seq_caller=df_two_2)
    df_total2.to_csv("110/model_1_inst_total")

    df_three_0 = pd.read_csv(trace_csv_three[0], header=0, index_col=index_col)
    df_three_1 = pd.read_csv(trace_csv_three[1], header=0, index_col=index_col)
    df_three_1.pop("test_trace_id")
    df_three_1.pop("test_case_id")
    df_three_2 = pd.read_csv(trace_csv_three[2], header=0, index_col=index_col)
    # 将各个部分的数据JOIN起来
    df_total3 = preprocessing_set.merge_data(df_trace=df_three_0,
                                             df_seq=df_three_1,
                                             df_seq_caller=df_three_2)
    df_total3.to_csv("110/model_1_config_total")
コード例 #2
0
ファイル: run.py プロジェクト: microcosmx/algorithms
def preprocessing():
    # First Set of CSV
    trace_csv_one = [
        "110/trace_verified_sequence.csv", "110/seq_seq_sequence.csv",
        "110/seq_caller_sequence.csv"
    ]
    # Second Set of CSV
    trace_csv_two = [
        "110/trace_verified_instance3.csv", "110/seq_seq_instance3.csv",
        "110/seq_caller_instance3.csv"
    ]
    # Third Set of CSV
    trace_csv_three = [
        "110/trace_verified_config_2.csv", "110/seq_seq_config.csv",
        "110/seq_caller_config.csv"
    ]
    # Index Column Name
    index_col = "trace_id"
    # Read CONFIG and INSTANCE
    df_one_0 = pd.read_csv(trace_csv_one[0], header=0, index_col=index_col)
    df_two_0 = pd.read_csv(trace_csv_two[0], header=0, index_col=index_col)
    df_three_0 = pd.read_csv(trace_csv_three[0], header=0, index_col=index_col)
    df_total_0 = preprocessing_set.append_data(df_one_0, df_two_0)
    df_total_0 = preprocessing_set.append_data(df_total_0, df_three_0)
    df_total_0 = get_min_data(df_total_0)
    # Read SEQUENCE-SEQ
    df_one_1 = pd.read_csv(trace_csv_one[1], header=0, index_col=index_col)
    df_two_1 = pd.read_csv(trace_csv_two[1], header=0, index_col=index_col)
    df_three_1 = pd.read_csv(trace_csv_three[1], header=0, index_col=index_col)
    df_total_1 = preprocessing_set.append_data(df_one_1, df_two_1)
    df_total_1 = preprocessing_set.append_data(df_total_1, df_three_1)
    df_total_1 = get_min_data(df_total_1)
    # Read SEQUENCE - CALLER
    df_one_2 = pd.read_csv(trace_csv_one[2], header=0, index_col=index_col)
    df_two_2 = pd.read_csv(trace_csv_two[2], header=0, index_col=index_col)
    df_three_2 = pd.read_csv(trace_csv_three[2], header=0, index_col=index_col)
    df_total_2 = preprocessing_set.append_data(df_one_2, df_two_2)
    df_total_2 = preprocessing_set.append_data(df_total_2, df_three_2)
    df_total_2 = get_min_data(df_total_2)
    # 将各个部分的数据JOIN起来
    df_total = preprocessing_set.merge_data(df_trace=df_total_0,
                                            df_seq=df_total_1,
                                            df_seq_caller=df_total_2)
    # 填补空缺值
    print("fill")
    df_total = preprocessing_set.fill_empty_data(df_total)
    # 丢弃没故障数据
    # df_total = df_total.loc[df_total["y_issue_ms"] != "Success"]
    # 把不规则的值转换成数字
    print("convert")
    df_total = preprocessing_set.convert_data(df_total)
    # 按照某个Label对数据进行过采样以平衡样本数量
    # df_total = preprocessing_set.sampling(df_total, "y_final_result")
    # 过采样后打乱数据
    # df_total = shuffle(df_total)
    # 输出数据
    print("output")
    df_total.to_csv("ready_use_max_without_sampling_mms.csv")
コード例 #3
0
ファイル: evaluation_3.py プロジェクト: FudanSELab/AI_Ops
def collect_data():

    print("准备读取production数据")
    df_f1 = pd.read_csv(
        "production/f1/trace_verified_sequence_f1_combined.csv",
        header=0,
        index_col="trace_id")
    df_f2 = pd.read_csv(
        "production/f2/trace_verified_sequence_f2_combined.csv",
        header=0,
        index_col="trace_id")
    df_f3 = pd.read_csv("production/f3/trace_verified_config_f3_combined.csv",
                        header=0,
                        index_col="trace_id")
    df_f4 = pd.read_csv("production/f4/trace_verified_config_f4_combined.csv",
                        header=0,
                        index_col="trace_id")
    df_f5 = pd.read_csv("production/f5/trace_verified_config_f5_combined.csv",
                        header=0,
                        index_col="trace_id")
    df_f7 = pd.read_csv("production/f7/trace_verified_config_f7_combined.csv",
                        header=0,
                        index_col="trace_id")
    df_f8 = pd.read_csv(
        "production/f8/trace_verified_instance_f8_combined.csv",
        header=0,
        index_col="trace_id")
    df_f11 = pd.read_csv(
        "production/f11/trace_verified_instance_f11_combined.csv",
        header=0,
        index_col="trace_id")
    df_f12 = pd.read_csv(
        "production/f12/trace_verified_instance_f12_combined.csv",
        header=0,
        index_col="trace_id")
    df_f13 = pd.read_csv(
        "production/f13/trace_verified_sequence_f13_combined.csv",
        header=0,
        index_col="trace_id")

    print("获取production数据的index")
    df_f1_index = df_f1.index
    df_f2_index = df_f2.index
    df_f3_index = df_f3.index
    df_f4_index = df_f4.index
    df_f5_index = df_f5.index
    df_f7_index = df_f7.index
    df_f8_index = df_f8.index
    df_f11_index = df_f11.index
    df_f12_index = df_f12.index
    df_f13_index = df_f13.index

    print("连接Production数据")
    df_f_all = preprocessing_set.append_data(df_f1, df_f2)
    df_f_all = preprocessing_set.append_data(df_f_all, df_f3)
    df_f_all = preprocessing_set.append_data(df_f_all, df_f4)
    df_f_all = preprocessing_set.append_data(df_f_all, df_f5)
    df_f_all = preprocessing_set.append_data(df_f_all, df_f7)
    df_f_all = preprocessing_set.append_data(df_f_all, df_f8)
    df_f_all = preprocessing_set.append_data(df_f_all, df_f11)
    df_f_all = preprocessing_set.append_data(df_f_all, df_f12)
    df_f_all = preprocessing_set.append_data(df_f_all, df_f13)

    df_f_all.pop("test_case_id")
    df_f_all.pop("test_trace_id")

    print("读取训练数据")
    # First Set of CSV
    trace_csv_one = [
        "110/trace_verified_sequence.csv", "110/seq_seq_sequence.csv",
        "110/seq_caller_sequence.csv"
    ]
    # Second Set of CSV
    trace_csv_two = [
        "110/trace_verified_instance3.csv", "110/seq_seq_instance3.csv",
        "110/seq_caller_instance3.csv"
    ]
    # Third Set of CSV
    trace_csv_three = [
        "110/trace_verified_config_2.csv", "110/seq_seq_config.csv",
        "110/seq_caller_config.csv"
    ]
    # Index Column Name
    index_col = "trace_id"
    # Read CONFIG and INSTANCE
    df_one_0 = pd.read_csv(trace_csv_one[0], header=0, index_col=index_col)
    df_two_0 = pd.read_csv(trace_csv_two[0], header=0, index_col=index_col)
    df_three_0 = pd.read_csv(trace_csv_three[0], header=0, index_col=index_col)

    df_one_0.pop("test_case_id")
    df_one_0.pop("test_trace_id")
    df_two_0.pop("test_case_id")
    df_two_0.pop("test_trace_id")
    df_three_0.pop("test_case_id")
    df_three_0.pop("test_trace_id")

    df_total_0 = preprocessing_set.append_data(df_one_0, df_two_0)
    df_total_0 = preprocessing_set.append_data(df_total_0, df_three_0)
    # Read SEQUENCE-SEQ
    df_one_1 = pd.read_csv(trace_csv_one[1], header=0, index_col=index_col)
    df_two_1 = pd.read_csv(trace_csv_two[1], header=0, index_col=index_col)
    df_three_1 = pd.read_csv(trace_csv_three[1], header=0, index_col=index_col)

    df_one_1.pop("test_case_id")
    df_one_1.pop("test_trace_id")
    df_two_1.pop("test_case_id")
    df_two_1.pop("test_trace_id")
    df_three_1.pop("test_case_id")
    df_three_1.pop("test_trace_id")

    df_total_1 = preprocessing_set.append_data(df_one_1, df_two_1)
    df_total_1 = preprocessing_set.append_data(df_total_1, df_three_1)
    # Read SEQUENCE - CALLER
    df_one_2 = pd.read_csv(trace_csv_one[2], header=0, index_col=index_col)
    df_two_2 = pd.read_csv(trace_csv_two[2], header=0, index_col=index_col)
    df_three_2 = pd.read_csv(trace_csv_three[2], header=0, index_col=index_col)

    # df_one_2.pop("test_case_id")
    # df_one_2.pop("test_trace_id")
    # df_two_2.pop("test_case_id")
    # df_two_2.pop("test_trace_id")
    # df_three_2.pop("test_case_id")
    # df_three_2.pop("test_trace_id")

    df_total_2 = preprocessing_set.append_data(df_one_2, df_two_2)
    df_total_2 = preprocessing_set.append_data(df_total_2, df_three_2)
    # 将各个部分的数据JOIN起来
    df_total = preprocessing_set.merge_data(df_trace=df_total_0,
                                            df_seq=df_total_1,
                                            df_seq_caller=df_total_2)

    print("连接训练数据与Production数据")
    df_total_with_f = preprocessing_set.append_data(df_f_all, df_total)

    df_total_with_f.to_csv("production/df_total_with_f.csv")