def find_colors(train_Y): cs = [] for val in train_Y: if val == 0: cs.append('c') else: cs.append('r') return cs # find_params_dbscan(X,Y,1) X1, Y1 = SMOTE(k_neighbors=40).fit_sample(X, Y) X2, Y2 = CCR(energy=3).fit_sample(X, Y) X3, Y3 = dbscan_based.DbscanBasedOversample(eps=1, min_pts=3, outline_radio=0.9).fit_sample( X, Y) plt.rcParams['figure.figsize'] = (5.0, 5.0) plt.scatter(X[:, 0], X[:, 1], c=find_colors(Y)) plt.axis('off', ) plt.xticks([]) plt.yticks([]) plt.savefig('myplot20_0.png') #ax0.set_title('Original dataset') plt.show() plt.scatter(X1[:, 0], X1[:, 1], c=find_colors(Y1)) #ax1.set_title('CCR') plt.axis('off') plt.xticks([]) plt.yticks([])
'adult': (1.6, 3), 'breast-cancer-wisconsin': (0.5, 3), 'haberman': (0.14, 3) } eps_es = np.linspace(0.05, 5, 21) # print(eps min_pts = [2, 3, 5, 7] precisions, recall, f1, auc_score = [], [], [], [] for eps in eps_es: model = KNeighborsClassifier(n_neighbors=3) list1 = compare_different_oversample_method( model, dbscan_based.DbscanBasedOversample(eps=eps, min_pts=3, filter_majority=False), X, Y) # np.array([round(precision, 3), round(recall, 3), round(f1, 3),round(auc_score,3)]) precisions.append(list1[0]) recall.append(list1[1]) f1.append(list1[2]) auc_score.append(list1[3]) plt.plot( eps_es, f1, linestyle='--', # 折线类型 linewidth=2, # 折线宽度 color='c', # 折线颜色 marker='^', # 点的形状 markersize=10, # 点的大小
#model = DecisionTreeClassifier(max_depth=5, min_samples_split=3) # from sklearn.naive_bayes import GaussianNB # model=GaussianNB() from sklearn.linear_model import LogisticRegression model = LogisticRegression() model_name = 'LR' i = 4 import time time1=time.time() print() res_list1 = compare_different_oversample_method(model, None, X, Y) time2=time.time() print('没有过采样计算完成,用时:{}'.format(time2-time1)) res_list2 = compare_different_oversample_method(model, dbscan_based.DbscanBasedOversample(eps=eps, min_pts=min_pts, outline_radio=0.5, noise_radio=0.1, # fit_outline_radio=False, filter_majority=False), X, Y) print('dbscan计算完成') res_list3 = compare_different_oversample_method(model, CCR(), X, Y) print('ccr计算完成') res_list4 = compare_different_oversample_method(model, 'smote', X, Y) print('smote计算完成') res_list5 = compare_different_oversample_method(model, 'borderline_smote', X, Y) print('borderline_smote计算完成') res_list6 = compare_different_oversample_method(model, 'adasyn', X, Y) print('adasyn计算完成') res_list7 = compare_different_oversample_method(model, 'SMOTE_ENN', X, Y) print('SMOTE_ENN计算完成') print_result('None', res_list1)
def find_params_dbscan(train_X, train_Y, c): #0.36 2 classifier = DBSCAN(eps=0.6, min_samples=10) X = train_X[train_Y == c] C = classifier.fit_predict(X) print('簇的个数:{}'.format(max(C) + 1)) print(Counter(C)) plt.scatter(X[:, 0], X[:, 1], c=C) plt.show() if __name__ == '__main__': plt.scatter(X[:, 0], X[:, 1], c=Y) plt.show() #find_params_dbscan(X,Y,c=1) X1, Y1 = dbscan_based.DbscanBasedOversample(eps=0.8, min_pts=5).fit_sample(X, Y) X2, Y2 = SMOTE(k_neighbors=10).fit_sample(X, Y) plt.rcParams['figure.figsize'] = (13.0, 4.0) fig = plt.figure() ax1 = fig.add_subplot(1, 3, 1) ax1.scatter(X[:, 0], X[:, 1], c=Y) plt.axis('off') plt.xticks([]) plt.yticks([]) ax2 = fig.add_subplot(1, 3, 2) ax2.scatter(X1[:, 0], X1[:, 1], c=Y1) plt.axis('off') plt.xticks([]) plt.yticks([]) ax3 = fig.add_subplot(1, 3, 3)