Ejemplo n.º 1
0
def find_colors(train_Y):
    cs = []
    for val in train_Y:
        if val == 0:
            cs.append('c')
        else:
            cs.append('r')
    return cs


# find_params_dbscan(X,Y,1)

X1, Y1 = SMOTE(k_neighbors=40).fit_sample(X, Y)
X2, Y2 = CCR(energy=3).fit_sample(X, Y)
X3, Y3 = dbscan_based.DbscanBasedOversample(eps=1,
                                            min_pts=3,
                                            outline_radio=0.9).fit_sample(
                                                X, Y)

plt.rcParams['figure.figsize'] = (5.0, 5.0)
plt.scatter(X[:, 0], X[:, 1], c=find_colors(Y))
plt.axis('off', )
plt.xticks([])
plt.yticks([])
plt.savefig('myplot20_0.png')
#ax0.set_title('Original dataset')
plt.show()
plt.scatter(X1[:, 0], X1[:, 1], c=find_colors(Y1))
#ax1.set_title('CCR')
plt.axis('off')
plt.xticks([])
plt.yticks([])
    'adult': (1.6, 3),
    'breast-cancer-wisconsin': (0.5, 3),
    'haberman': (0.14, 3)
}

eps_es = np.linspace(0.05, 5, 21)
# print(eps

min_pts = [2, 3, 5, 7]
precisions, recall, f1, auc_score = [], [], [], []
for eps in eps_es:
    model = KNeighborsClassifier(n_neighbors=3)
    list1 = compare_different_oversample_method(
        model,
        dbscan_based.DbscanBasedOversample(eps=eps,
                                           min_pts=3,
                                           filter_majority=False), X, Y)
    # np.array([round(precision, 3), round(recall, 3), round(f1, 3),round(auc_score,3)])
    precisions.append(list1[0])
    recall.append(list1[1])
    f1.append(list1[2])
    auc_score.append(list1[3])

plt.plot(
    eps_es,
    f1,
    linestyle='--',  # 折线类型
    linewidth=2,  # 折线宽度
    color='c',  # 折线颜色
    marker='^',  # 点的形状
    markersize=10,  # 点的大小
Ejemplo n.º 3
0
 #model = DecisionTreeClassifier(max_depth=5, min_samples_split=3)
 # from sklearn.naive_bayes import GaussianNB
 # model=GaussianNB()
 from sklearn.linear_model import LogisticRegression
 model = LogisticRegression()
 model_name = 'LR'
 i = 4
 import time
 time1=time.time()
 print()
 res_list1 = compare_different_oversample_method(model, None, X, Y)
 time2=time.time()
 print('没有过采样计算完成,用时:{}'.format(time2-time1))
 res_list2 = compare_different_oversample_method(model, dbscan_based.DbscanBasedOversample(eps=eps, min_pts=min_pts,
                                                                                           outline_radio=0.5,
                                                                                           noise_radio=0.1,
                                                                                          # fit_outline_radio=False,
                                                                                           filter_majority=False), X,
                                                 Y)
 print('dbscan计算完成')
 res_list3 = compare_different_oversample_method(model, CCR(), X, Y)
 print('ccr计算完成')
 res_list4 = compare_different_oversample_method(model, 'smote', X, Y)
 print('smote计算完成')
 res_list5 = compare_different_oversample_method(model, 'borderline_smote', X, Y)
 print('borderline_smote计算完成')
 res_list6 = compare_different_oversample_method(model, 'adasyn', X, Y)
 print('adasyn计算完成')
 res_list7 = compare_different_oversample_method(model, 'SMOTE_ENN', X, Y)
 print('SMOTE_ENN计算完成')
 print_result('None', res_list1)
def find_params_dbscan(train_X, train_Y, c):
    #0.36 2
    classifier = DBSCAN(eps=0.6, min_samples=10)
    X = train_X[train_Y == c]
    C = classifier.fit_predict(X)
    print('簇的个数:{}'.format(max(C) + 1))
    print(Counter(C))
    plt.scatter(X[:, 0], X[:, 1], c=C)
    plt.show()


if __name__ == '__main__':
    plt.scatter(X[:, 0], X[:, 1], c=Y)
    plt.show()
    #find_params_dbscan(X,Y,c=1)
    X1, Y1 = dbscan_based.DbscanBasedOversample(eps=0.8,
                                                min_pts=5).fit_sample(X, Y)
    X2, Y2 = SMOTE(k_neighbors=10).fit_sample(X, Y)
    plt.rcParams['figure.figsize'] = (13.0, 4.0)
    fig = plt.figure()

    ax1 = fig.add_subplot(1, 3, 1)
    ax1.scatter(X[:, 0], X[:, 1], c=Y)
    plt.axis('off')
    plt.xticks([])
    plt.yticks([])
    ax2 = fig.add_subplot(1, 3, 2)
    ax2.scatter(X1[:, 0], X1[:, 1], c=Y1)
    plt.axis('off')
    plt.xticks([])
    plt.yticks([])
    ax3 = fig.add_subplot(1, 3, 3)