예제 #1
0
def f_score(y_true, y_pred, N):
    '''
    :param y_true: 真实值
    :param y_pred: 预测值
    :param N: F分数中的参数
    :return result: 最终计算得出的F分数
    '''
    num = (1 + N * N) * p(y_true, y_pred) * r(y_true, y_pred)
    deno = N * N * p(y_true, y_pred) + r(y_true, y_pred)
    result = num / deno
    return result
예제 #2
0
def simple_linear_regression(df1, df2, df3, df4, k):
    indep = corr(df1, df2, k)
    independent_attr = df1[indep].values.reshape(len(df1), 1)
    dependent_attr = df2.values.reshape(len(df2), 1)
    regr = LinearRegression()
    regr.fit(independent_attr, dependent_attr)
    predicted_values = regr.predict(independent_attr)
    error = 0
    for i in range(len(independent_attr)):
        error += ((predicted_values[i][0] - dependent_attr[i][0])**2)
    print(indep)
    rmse = error**.5
    print("For train data, RMSE =", rmse)
    print("R^2 score = ", r(dependent_attr, predicted_values))

    test_data = df3[indep].values.reshape(len(X_test), 1)
    test_predict = regr.predict(test_data)

    error = 0
    for i in range(len(X_test)):
        error += ((test_predict[i][0] - df4.values[i])**2)

    rmse = error**0.5
    print("For test data, RMSE =", rmse)

    plt.scatter(test_data, df4.values, color="cyan", alpha=1, s=1)
    plt.plot(test_data, test_predict, "r:")
    plt.show()
예제 #3
0
        STD = mean_and_std.loc[data.columns[0], "Std"]
        N = attribute_N.loc[data.columns[0], "N"]
        label = data.iloc[299: -1, 1].values

        y_pred = [1 if gap > MEAN + N*STD else 0 for gap in gaps]
        # 打印单维预测信息
        aims[299:-1, read_file_list.index(file)] = y_pred
        y_pred = filter(y_pred)
        print("---------------------")
        print(data.columns[0], " 调整之后的预测精度展示:")
        print("N = ", N)
        print("mean = ",MEAN)
        print("std = ",STD)
        print("Test acc score: {:.6f}".format(ac(label, y_pred)))
        print("Test p score: {:.6f}".format(p(label, y_pred)))
        print("Test r score: {:.6f}".format(r(label, y_pred)))
        print("confusion matrix:")
        print(confusion_matrix(label, y_pred))

    # 计算并存储最后一个阈值
    source_label = source_label
    aims = np.sum(aims, axis=1)
    f = open('./data/end_N.txt')
    end_N = int(float(f.read()))
    f.close()
    # f1分数
    end_N = 5
    aim_label = [1 if aim > end_N else 0 for aim in aims]

    # 打印输出并存储阈值
    print("---------------------")
예제 #4
0
    print("训练集样本大小为:", train_x.shape[0])
    print("训练集正常样本大小为:", train_x.shape[0] - np.sum(train_y))
    print("训练集异常样本大小为:", np.sum(train_y))
    print("测试集样本大小为:", test_x.shape[0])
    print("测试集正常样本大小为:", test_x.shape[0] - np.sum(test_y))
    print("测试集异常样本大小为:", np.sum(test_y))

    # 训练并保存模型
    k_means = KMeans(n_clusters=2)
    k_means.fit(train_x, train_y)
    joblib.dump(k_means, "kmeans_model.pkl")

    # 预测
    y_pred = k_means.predict(test_x)
    print("--------------------")
    print("预测结果为:")
    print("Test acc score: {:.6f}".format(ac(test_y, y_pred)))
    print("Test p score: {:.6f}".format(p(test_y, y_pred)))
    print("Test r score: {:.6f}".format(r(test_y, y_pred)))
    print("confusion matrix:")
    print(confusion_matrix(test_y, y_pred))
    '''
    预测结果为:
    Test acc score: 0.549053
    Test p score: 0.023267
    Test r score: 0.416620
    confusion matrix:
    [[76638 62088]
     [ 2071  1479]]
    '''
예제 #5
0
for i in range(1, 13):
    Xt = PCA(n_components=i).fit_transform(X)
    pf = PolynomialFeatures(
        degree=2
    )  ###You can change degree = 1, 3 , 4, 5 and so on.(degree = 2 is the best model)
    polyXt = pf.fit_transform(Xt)
    x_train, x_test, y_train, y_test = t**s(Xt,
                                            Y,
                                            test_size=0.3,
                                            random_state=42)
    y_test = np.array(y_test).reshape(-1, 1)
    y_test.reshape(-1, 1)
    model = LinearRegression().fit(x_train, y_train)
    y_pred = model.predict(x_test).reshape(-1, 1)
    score = r(y_test, y_pred)
    rmse = mse(y_pred, y_test)**0.5
    l2Score["orig"].append([score, rmse])

    Xt = PCA(n_components=i).fit_transform(X_norm)
    pf = PolynomialFeatures(degree=2)
    polyXt = pf.fit_transform(Xt)
    x_train, x_test, y_train, y_test = t**s(Xt,
                                            Y,
                                            test_size=0.3,
                                            random_state=42)
    y_test = np.array(y_test).reshape(-1, 1)
    model = LinearRegression().fit(x_train, y_train)
    y_pred = model.predict(x_test).reshape(-1, 1)
    np.array(y_test).reshape(-1, 1)
    score = r(y_test, y_pred)
예제 #6
0
for i in range(len(read_file_list)):
    data = pd.read_csv(read_file_list[i], engine="python")["Class"]
    sources[:, i] = data.values
sources = np.sum(sources, axis=1)
source_label = [1 if source > 0 else 0 for source in sources]

num_3 = np.load("num_3.txt.npy")
num_N = np.load("num_N.txt.npy")
one_3 = np.load("one_3.txt.npy")
one_N = np.load("one_N.txt.npy")

print("--------------------------------------------")
print("num_3")
print("Test acc score: {:.6f}".format(ac(source_label, num_3)))
print("Test p score: {:.6f}".format(p(source_label, num_3)))
print("Test r score: {:.6f}".format(r(source_label, num_3)))
data = pd.DataFrame()
data["y_true"] = source_label
data["y_pred"] = num_3
print("TP", data[(data["y_pred"] == 1) & (data["y_true"] == 1)].shape[0])
print("FP", data[(data["y_pred"] == 1) & (data["y_true"] == 0)].shape[0])

print("--------------------------------------------")
print("num_N")
print("Test acc score: {:.6f}".format(ac(source_label, num_N)))
print("Test p score: {:.6f}".format(p(source_label, num_N)))
print("Test r score: {:.6f}".format(r(source_label, num_N)))
data = pd.DataFrame()
data["y_true"] = source_label
data["y_pred"] = num_N
print("TP", data[(data["y_pred"] == 1) & (data["y_true"] == 1)].shape[0])
예제 #7
0
        index = np.where(acc == acc.max())
        accuracies.append(acc.max())
        best_threshold.loc[data.columns[0], "N"] = threshold[index[0][0]]

        # 打印预测信息
        N = threshold[index[0][0]]
        y_pred = [1 if gap > MEAN + N * STD else 0 for gap in gaps]
        # y_pred = filter(y_pred)
        print("---------------------")
        print(data.columns[0], " 调整之后的预测精度展示:")
        print("N = ", N)
        print("mean = ", MEAN)
        print("std = ", STD)
        print("Test acc score: {:.6f}".format(ac(label, y_pred)))
        print("Test p score: {:.6f}".format(p(label, y_pred)))
        print("Test r score: {:.6f}".format(r(label, y_pred)))
        print("confusion matrix:")
        print(confusion_matrix(label, y_pred))

        # 绘图
        plt.switch_backend('agg')
        plt.subplot(2, 1, 1)
        plt.plot(tmp_y, c="b")
        plt.plot(result[:, 0], c="r")
        plt.title(data.columns[0])
        plt.subplot(2, 1, 2)
        plt.plot(gaps, c="b")
        plt.plot((MEAN + N * STD) * np.ones(len(gaps)))
        plt.savefig("./single_result_picture/" + data.columns[0] + ".png")

    # 存储