def f_score(y_true, y_pred, N): ''' :param y_true: 真实值 :param y_pred: 预测值 :param N: F分数中的参数 :return result: 最终计算得出的F分数 ''' num = (1 + N * N) * p(y_true, y_pred) * r(y_true, y_pred) deno = N * N * p(y_true, y_pred) + r(y_true, y_pred) result = num / deno return result
def simple_linear_regression(df1, df2, df3, df4, k): indep = corr(df1, df2, k) independent_attr = df1[indep].values.reshape(len(df1), 1) dependent_attr = df2.values.reshape(len(df2), 1) regr = LinearRegression() regr.fit(independent_attr, dependent_attr) predicted_values = regr.predict(independent_attr) error = 0 for i in range(len(independent_attr)): error += ((predicted_values[i][0] - dependent_attr[i][0])**2) print(indep) rmse = error**.5 print("For train data, RMSE =", rmse) print("R^2 score = ", r(dependent_attr, predicted_values)) test_data = df3[indep].values.reshape(len(X_test), 1) test_predict = regr.predict(test_data) error = 0 for i in range(len(X_test)): error += ((test_predict[i][0] - df4.values[i])**2) rmse = error**0.5 print("For test data, RMSE =", rmse) plt.scatter(test_data, df4.values, color="cyan", alpha=1, s=1) plt.plot(test_data, test_predict, "r:") plt.show()
STD = mean_and_std.loc[data.columns[0], "Std"] N = attribute_N.loc[data.columns[0], "N"] label = data.iloc[299: -1, 1].values y_pred = [1 if gap > MEAN + N*STD else 0 for gap in gaps] # 打印单维预测信息 aims[299:-1, read_file_list.index(file)] = y_pred y_pred = filter(y_pred) print("---------------------") print(data.columns[0], " 调整之后的预测精度展示:") print("N = ", N) print("mean = ",MEAN) print("std = ",STD) print("Test acc score: {:.6f}".format(ac(label, y_pred))) print("Test p score: {:.6f}".format(p(label, y_pred))) print("Test r score: {:.6f}".format(r(label, y_pred))) print("confusion matrix:") print(confusion_matrix(label, y_pred)) # 计算并存储最后一个阈值 source_label = source_label aims = np.sum(aims, axis=1) f = open('./data/end_N.txt') end_N = int(float(f.read())) f.close() # f1分数 end_N = 5 aim_label = [1 if aim > end_N else 0 for aim in aims] # 打印输出并存储阈值 print("---------------------")
print("训练集样本大小为:", train_x.shape[0]) print("训练集正常样本大小为:", train_x.shape[0] - np.sum(train_y)) print("训练集异常样本大小为:", np.sum(train_y)) print("测试集样本大小为:", test_x.shape[0]) print("测试集正常样本大小为:", test_x.shape[0] - np.sum(test_y)) print("测试集异常样本大小为:", np.sum(test_y)) # 训练并保存模型 k_means = KMeans(n_clusters=2) k_means.fit(train_x, train_y) joblib.dump(k_means, "kmeans_model.pkl") # 预测 y_pred = k_means.predict(test_x) print("--------------------") print("预测结果为:") print("Test acc score: {:.6f}".format(ac(test_y, y_pred))) print("Test p score: {:.6f}".format(p(test_y, y_pred))) print("Test r score: {:.6f}".format(r(test_y, y_pred))) print("confusion matrix:") print(confusion_matrix(test_y, y_pred)) ''' 预测结果为: Test acc score: 0.549053 Test p score: 0.023267 Test r score: 0.416620 confusion matrix: [[76638 62088] [ 2071 1479]] '''
for i in range(1, 13): Xt = PCA(n_components=i).fit_transform(X) pf = PolynomialFeatures( degree=2 ) ###You can change degree = 1, 3 , 4, 5 and so on.(degree = 2 is the best model) polyXt = pf.fit_transform(Xt) x_train, x_test, y_train, y_test = t**s(Xt, Y, test_size=0.3, random_state=42) y_test = np.array(y_test).reshape(-1, 1) y_test.reshape(-1, 1) model = LinearRegression().fit(x_train, y_train) y_pred = model.predict(x_test).reshape(-1, 1) score = r(y_test, y_pred) rmse = mse(y_pred, y_test)**0.5 l2Score["orig"].append([score, rmse]) Xt = PCA(n_components=i).fit_transform(X_norm) pf = PolynomialFeatures(degree=2) polyXt = pf.fit_transform(Xt) x_train, x_test, y_train, y_test = t**s(Xt, Y, test_size=0.3, random_state=42) y_test = np.array(y_test).reshape(-1, 1) model = LinearRegression().fit(x_train, y_train) y_pred = model.predict(x_test).reshape(-1, 1) np.array(y_test).reshape(-1, 1) score = r(y_test, y_pred)
for i in range(len(read_file_list)): data = pd.read_csv(read_file_list[i], engine="python")["Class"] sources[:, i] = data.values sources = np.sum(sources, axis=1) source_label = [1 if source > 0 else 0 for source in sources] num_3 = np.load("num_3.txt.npy") num_N = np.load("num_N.txt.npy") one_3 = np.load("one_3.txt.npy") one_N = np.load("one_N.txt.npy") print("--------------------------------------------") print("num_3") print("Test acc score: {:.6f}".format(ac(source_label, num_3))) print("Test p score: {:.6f}".format(p(source_label, num_3))) print("Test r score: {:.6f}".format(r(source_label, num_3))) data = pd.DataFrame() data["y_true"] = source_label data["y_pred"] = num_3 print("TP", data[(data["y_pred"] == 1) & (data["y_true"] == 1)].shape[0]) print("FP", data[(data["y_pred"] == 1) & (data["y_true"] == 0)].shape[0]) print("--------------------------------------------") print("num_N") print("Test acc score: {:.6f}".format(ac(source_label, num_N))) print("Test p score: {:.6f}".format(p(source_label, num_N))) print("Test r score: {:.6f}".format(r(source_label, num_N))) data = pd.DataFrame() data["y_true"] = source_label data["y_pred"] = num_N print("TP", data[(data["y_pred"] == 1) & (data["y_true"] == 1)].shape[0])
index = np.where(acc == acc.max()) accuracies.append(acc.max()) best_threshold.loc[data.columns[0], "N"] = threshold[index[0][0]] # 打印预测信息 N = threshold[index[0][0]] y_pred = [1 if gap > MEAN + N * STD else 0 for gap in gaps] # y_pred = filter(y_pred) print("---------------------") print(data.columns[0], " 调整之后的预测精度展示:") print("N = ", N) print("mean = ", MEAN) print("std = ", STD) print("Test acc score: {:.6f}".format(ac(label, y_pred))) print("Test p score: {:.6f}".format(p(label, y_pred))) print("Test r score: {:.6f}".format(r(label, y_pred))) print("confusion matrix:") print(confusion_matrix(label, y_pred)) # 绘图 plt.switch_backend('agg') plt.subplot(2, 1, 1) plt.plot(tmp_y, c="b") plt.plot(result[:, 0], c="r") plt.title(data.columns[0]) plt.subplot(2, 1, 2) plt.plot(gaps, c="b") plt.plot((MEAN + N * STD) * np.ones(len(gaps))) plt.savefig("./single_result_picture/" + data.columns[0] + ".png") # 存储