# clf = svm.SVC(C=0.1, kernel='rbf', gamma='auto', # shrinking=True, probability=True, tol=0.0001, # cache_size=1000, max_iter=-1, class_weight='balanced', # decision_function_shape='ovr', random_state=None # ) # clf=AdaBoostClassifier( n_estimators=150,algorithm='SAMME.R',learning_rate=0.8)#8,9 # clf=BaggingClassifier(n_estimators=200,max_samples=1.0,max_features=1.0, # bootstrap=True,bootstrap_features=False,random_state=200) clf.fit(train_in, train_out) test_predict = clf.predict(test_in) proba_test = clf.predict_proba(test_in) train_predict = clf.predict(train_in) proba_train = clf.predict_proba(train_in) test1, test2 = ann.evaluatemodel(train_out, train_predict, proba_train[:, 1]) evaluate_train.extend(test1) prenum_train.extend(test2) test3, test4 = ann.evaluatemodel(test_out, test_predict, proba_test[:, 1]) # test model with testset evaluate_test.extend(test3) prenum_test.extend(test4) Result_test = pd.DataFrame( evaluate_test, columns=['TPR', 'SPC', 'PPV', 'NPV', 'ACC', 'AUC', 'BER']) # Result_test.to_csv('BER_LR_ks.csv') Result_test.boxplot() plt.show()
test_predict = sum_pre#测试集的预测结果 sum_prob = [] for j in range(np.shape(test_in)[0]): colj = predict_prob[:, j] if sum_pre[j] == 1: tmp_prob = np.mean(colj[colj >= 0.5]) else: tmp_prob = np.mean(colj[colj < 0.5]) # if tmp_prob == np.nan: # print('test') sum_prob.append(tmp_prob) sum_prob = np.array(sum_prob) proba_test = sum_prob#测试集概率预测结果 test3, test4 = ann.evaluatemodel(test_out, test_predict, proba_test) # test model with testset evaluate_test.extend(test3) prenum_test.extend(test4) Result_test = pd.DataFrame(evaluate_test, columns=['TPR', 'SPC', 'PPV', 'NPV', 'ACC', 'AUC', 'BER']) Result_test.to_csv('BER_LR_ks.csv') Result_test.boxplot() plt.show() mean_test = np.mean(evaluate_test, axis=0) std_test = np.std(evaluate_test, axis=0) evaluate_test.append(mean_test) evaluate_test.append(std_test) evaluate_test = np.array(evaluate_test)
for train, test in skf.split(dataMat, labelMat): print("%s %s" % (train, test)) train_in = dataMat[train] test_in = dataMat[test] train_out = labelMat[train] test_out = labelMat[test] train_in=train_in.reshape(-1,1)#只有一个特征值,过采样前特殊处理 train_in, train_out = RandomOverSampler().fit_sample(train_in, train_out) thre_tmp=findthresh(train_in,train_out) thre.append(thre_tmp) train_predict = binaryclassify(train_in,thre_tmp) test_predict=binaryclassify(test_in,thre_tmp) test1, test2 = ann.evaluatemodel(train_out, train_predict, train_in) # test model with trainset evaluate_train.extend(test1) test3, test4 = ann.evaluatemodel(test_out, test_predict, test_in) # test model with testset evaluate_test.extend(test3) Result_test = pd.DataFrame(evaluate_test, columns=['TPR', 'SPC', 'PPV', 'NPV', 'ACC', 'AUC', 'BER']) Result_test.to_csv('BER/BER_MEWS.csv') Result_test.boxplot() plt.show() mean_train = np.mean(evaluate_train, axis=0) std_train = np.std(evaluate_train, axis=0) evaluate_train.append(mean_train) evaluate_train.append(std_train)
skf = StratifiedKFold(n_splits=10) #十折交叉验证 kfold = 1 for train, test in skf.split(dataMat, labelMat): print("第%s 次交叉验证:" % kfold) train_in = dataMat[train] test_in = dataMat[test] train_out = labelMat[train] test_out = labelMat[test] train_in, train_out = RandomOverSampler().fit_sample(train_in, train_out) #平衡测试集样本 train_predict, test_predict, proba_train, proba_test = ann.ANNClassifier( neuo, train_in, train_out, test_in) proba_train = proba_train[:, 1] proba_test = proba_test[:, 1] test1, test2 = ann.evaluatemodel(train_out, train_predict, proba_train) #求训练集测试集的评价指标 evaluate_train.extend(test1) #训练集的各评价指标,百分比 prenum_train.extend(test2) #测试结果中,混淆矩阵对应的四个数 test3, test4 = ann.evaluatemodel(test_out, test_predict, proba_test) #test model with testset evaluate_test.extend(test3) prenum_test.extend(test4) kfold = kfold + 1 #十折交叉验证的次数 #对于测试集,保存模型预测的结果的评价指标,并绘制箱线图 Result_test = pd.DataFrame( evaluate_test, columns=['TPR', 'SPC', 'PPV', 'NPV', 'ACC', 'AUC', 'BER']) Result_test.to_csv('BER/BER_ANN_ks.csv') Result_test.boxplot()