Python plotROC 예제들, model_measurement.plotROC Python 예제들

예제 #1

0

파일 보기

def logistics_mod(X_train,Y_train,X_test,Y_test,drop_feature,combined,show_result,show_COR):

    if show_COR:
        model_measurement.plotCOR(X_train, WOE=False)
    else:
        pass

    logistics_mod = linear_model.LogisticRegression(solver='liblinear',C=1.0, tol=1e-6,verbose=0,fit_intercept=True)#,penalty='l2')

    if combined:
        X_train1=X_train.drop(drop_feature+['主叫_离散率组合_(1.0, 1.0)', '归属地是否未知_0.0'],axis=1)
        X_test1 = X_test.drop(drop_feature+['主叫_离散率组合_(1.0, 1.0)', '归属地是否未知_0.0'], axis=1)
    else:
        X_train1 = X_train.drop(drop_feature, axis=1)
        X_test1 = X_test.drop(drop_feature, axis=1)

    logistics_mod.fit(X_train1, Y_train)
    predictions = logistics_mod.predict_proba(X_test1)#每一类的概率
    model_measurement.plotROC(predictions, Y_test, 'LR', show=show_result)

    if show_result:
        model_measurement.VIF(X_train1, name='X_train1', combined=combined, WOE=False)
        model_measurement.plotKS(predictions,Y_test,'LR')
        model_measurement.coef(logistics_mod,X_train1,Y_train)

    else:
        pass

예제 #2

0

파일 보기

def logistics_mod_woe(X_woe_train,X_woe_test,Y_woe_train,Y_woe_test,drop_feature,combined,show_COR,show_result):

    if show_COR:
        model_measurement.plotCOR(X_woe_train, WOE=True)
    else:
        pass

    logistics_mod_woe = linear_model.LogisticRegression(solver='liblinear',C=1.0, tol=1e-6,verbose=0,fit_intercept=True)#,penalty='l2')
    X_woe_train1=X_woe_train.drop(drop_feature,axis=1)                                                                   #去掉IV值低于0.02的几个变量，没有用
    #X_woe_train1 = X_woe_train.filter(regex='主叫次数|平均通话时间')

    logistics_mod_woe.fit(X_woe_train1, Y_woe_train)
    #joblib.dump(logistics_mod_woe, "logistics_mod_woe.m")
    #print('model is saved')

    X_woe_test1=X_woe_test.drop(drop_feature,axis=1)
    #logistics_mod_woe2 = joblib.load("logistics_mod_woe.m")
    #print('model is loaded')
    predictions_woe = logistics_mod_woe.predict_proba(X_woe_test1)#每一类的概率
    #predictions_woe1 = logistics_mod_woe.predict(X_woe_test1.values)
    model_measurement.plotROC(predictions_woe, Y_woe_test, 'LR+WOE', show=show_result)


    if show_result:
        model_measurement.VIF(X_woe_train1, name='X_woe_train1', combined=combined, WOE=True)
        model_measurement.plotKS(predictions_woe, Y_woe_test, 'LR+WOE')
        model_measurement.coef(logistics_mod_woe, X_woe_train1, Y_woe_train)
        # model_measurement.plot_learning_curve(logistics_mod_woe,'Learning Curve of Logistics',X_woe, Y_woe)

    else:
        pass

예제 #3

0

파일 보기

def NN_woe_mod(X_woe_train,Y_woe_train, X_woe_test,Y_woe_test,loss,intermediate_dim,epochs,batch_size,dropout,activation,output_activation,show_result):

    X_woe_train_NN, X_woe_test_NN, Y_woe_train_NN, Y_woe_test_NN = NN.NN_processing(X_woe_train.values, X_woe_test.values, Y_woe_train.values, Y_woe_test.values)
    model = NN.NN_main_func(X_woe_train_NN.shape[1],intermediate_dim,loss=loss,dropout = dropout,activation= activation,output_activation=output_activation)
    model.fit(X_woe_train_NN, Y_woe_train_NN, epochs = epochs, batch_size = batch_size, validation_data = (X_woe_test_NN, Y_woe_test_NN),verbose=show_result)
    predictions_NN = model.predict(X_woe_test_NN)
    model_measurement.plotROC(predictions_NN,Y_woe_test_NN,show=show_result,name='NN+WOE',method='NN')

예제 #4

0

파일 보기

def logistics_mod_xgb_woe_SMOTE(X_woe_train,Y_woe_train,X_woe_test,Y_woe_test,minority,combined,show_result,nthread, learning_rate, n_estimators, max_depth, gamma,subsample, colsample_bytree):
    X_woe_train_SMOTE, Y_woe_train_SMOTE, Transdata_woe_train_SMOTE = processing.data_SMOTE(X_woe_train, Y_woe_train,
                                                                                            minority=minority,
                                                                                            combined=combined, WOE=True)
    mod_xgb_woe_SMOTE = xgb.XGBClassifier(nthread=nthread,  #含义：nthread=-1时，使用全部CPU进行并行运算（默认）, nthread=1时，使用1个CPU进行运算。
                                          learning_rate=learning_rate, # 含义：学习率，控制每次迭代更新权重时的步长，默认0.3。调参：值越小，训练越慢。典型值为0.01-0.2。
                                          n_estimators=n_estimators,  # 含义：总共迭代的次数，即决策树的个数
                                          max_depth=max_depth,  # 含义：树的深度，默认值为6，典型值3-10。调参：值越大，越容易过拟合；值越小，越容易欠拟合
                                          gamma=gamma,  # 含义：惩罚项系数，指定节点分裂所需的最小损失函数下降值。
                                          subsample=subsample,
                                          # 含义：训练每棵树时，使用的数据占全部训练集的比例。默认值为1，典型值为0.5-1。调参：防止overfitting。
                                          colsample_bytree=colsample_bytree)  # 训练每棵树时，使用的特征占全部特征的比例。默认值为1，典型值为0.5-1。调参：防止overfitting。
    mod_xgb_woe_SMOTE.fit(X_woe_train_SMOTE, Y_woe_train_SMOTE)

    X_train_leaves = mod_xgb_woe_SMOTE.apply(X_woe_train_SMOTE)
    X_test_leaves = mod_xgb_woe_SMOTE.apply(X_woe_test.values)
    train_rows = X_train_leaves.shape[0]
    # 合并编码后的训练数据和测试数据
    X_leaves = np.concatenate((X_train_leaves, X_test_leaves), axis=0)
    X_leaves = X_leaves.astype(np.int32)

    XGBencoder = OneHotEncoder()
    X_trans = XGBencoder.fit_transform(X_leaves)
    X_train_xgb = hstack([X_trans[:train_rows, :], X_woe_train_SMOTE.astype(float)])
    X_test_xgb = hstack([X_trans[train_rows:, :], X_woe_test.astype(np.float64)])

    logistics_mod_xgb = linear_model.LogisticRegression(solver='liblinear', C=1.0, tol=1e-6)  # ,verbose=1)#penalty='l2'
    logistics_mod_xgb.fit(X_train_xgb, Y_woe_train_SMOTE)
    predictions_xgblr = logistics_mod_xgb.predict_proba(X_test_xgb)
    model_measurement.plotROC(predictions_xgblr, Y_woe_test, name='XGBlr+WOE+SMOTE', show=show_result)

예제 #5

0

파일 보기

def SVM(X_train,Y_train,X_test,Y_test,kernel,C,drop_feature,show_result):

    grid_svc_best = svm.SVC(C = C, kernel=kernel,gamma='auto')
    X_train1 = X_train.drop(drop_feature, axis=1)
    grid_svc_best.fit(X_train1.values, Y_train.values)
    X_test1 = X_test.drop(drop_feature, axis=1)
    pred_SVM = grid_svc_best.predict(X_test1.values)
    model_measurement.plotROC(pred_SVM, Y_test, name='SVM',method='SVM',show=show_result)

예제 #6

0

파일 보기

def NN_SMOTE_mod(X_train,Y_train,X_test,Y_test,minority,combined,loss,intermediate_dim,epochs,batch_size,dropout,activation,output_activation,show_result):

    X_train_SMOTE, Y_train_SMOTE, Transdata_train_SMOTE = processing.data_SMOTE(X_train, Y_train, minority=minority,
                                                                                combined=combined, WOE=False)
    X_train_NN, X_test_NN, Y_train_NN, Y_test_NN = NN.NN_processing(X_train_SMOTE, X_test.values, Y_train_SMOTE, Y_test.values)
    model = NN.NN_main_func(X_train_NN.shape[1],intermediate_dim,loss=loss,dropout = dropout,activation=activation,output_activation=output_activation)
    model.fit(X_train_NN, Y_train_NN, epochs = epochs, batch_size = batch_size, validation_data = (X_test_NN, Y_test_NN),verbose=show_result)
    predictions_NN = model.predict(X_test_NN)
    model_measurement.plotROC(predictions_NN,Y_test_NN,show=show_result,name='NN+SMOTE',method='NN')

예제 #7

0

파일 보기

def SVM_woe(X_woe_train,X_woe_test,Y_woe_train,Y_woe_test,kernel,C,drop_feature,show_result):

    grid_svc_best = svm.SVC(C = C, kernel=kernel,gamma='auto')

    X_woe_train1 = X_woe_train.drop(drop_feature, axis=1)
    grid_svc_best.fit(X_woe_train1, Y_woe_train)
    X_woe_test1 = X_woe_test.drop(drop_feature, axis=1)
    pred_SVM_woe = grid_svc_best.predict(X_woe_test1)
    model_measurement.plotROC(pred_SVM_woe, Y_woe_test, name='SVM+WOE',method='SVM+WOE',show=show_result)

예제 #8

0

파일 보기

def SVM_woe_SMOTE(X_woe_train,X_woe_test,Y_woe_train,Y_woe_test,kernel,C,drop_feature,minority,combined,show_result):

    X_woe_train_SMOTE, Y_woe_train_SMOTE, Transdata_woe_train_SMOTE = processing.data_SMOTE(X_woe_train, Y_woe_train, minority=minority, combined=combined, WOE=True)
    grid_svc_best = svm.SVC(C = C, kernel=kernel,gamma='auto')

    X_woe_train_SMOTE1 = Transdata_woe_train_SMOTE.drop(['是否是诈骗电话'] + drop_feature, axis=1)
    grid_svc_best.fit(X_woe_train_SMOTE1, Y_woe_train_SMOTE)
    X_woe_test1 = X_woe_test.drop(drop_feature, axis=1)
    pred_SVM_woe_SMOTE = grid_svc_best.predict(X_woe_test1)
    model_measurement.plotROC(pred_SVM_woe_SMOTE, Y_woe_test, name='SVM+WOE+SMOTE',method='SVM+WOE+SMOTE',show=show_result)

예제 #9

0

파일 보기

def mod_xgb_woe(X_woe_train,Y_woe_train,X_woe_test,Y_woe_test,show_result,nthread, learning_rate, n_estimators, max_depth, gamma,subsample, colsample_bytree):
    mod_xgb_woe = xgb.XGBClassifier(nthread=nthread,  #含义：nthread=-1时，使用全部CPU进行并行运算（默认）, nthread=1时，使用1个CPU进行运算。
                                    learning_rate=learning_rate, # 含义：学习率，控制每次迭代更新权重时的步长，默认0.3。调参：值越小，训练越慢。典型值为0.01-0.2。
                                    n_estimators=n_estimators,  # 含义：总共迭代的次数，即决策树的个数
                                    max_depth=max_depth,  # 含义：树的深度，默认值为6，典型值3-10。调参：值越大，越容易过拟合；值越小，越容易欠拟合
                                    gamma=gamma,  # 含义：惩罚项系数，指定节点分裂所需的最小损失函数下降值。
                                    subsample=subsample,  # 含义：训练每棵树时，使用的数据占全部训练集的比例。默认值为1，典型值为0.5-1。调参：防止overfitting。
                                    colsample_bytree=colsample_bytree)  # 训练每棵树时，使用的特征占全部特征的比例。默认值为1，典型值为0.5-1。调参：防止overfitting。

    mod_xgb_woe.fit(X_woe_train.values, Y_woe_train.values)

    predictions_xgb_WOE = mod_xgb_woe.predict_proba(X_woe_test.values)#每一类的概率
    predictions_xgb_WOE1 = mod_xgb_woe.predict(X_woe_test.values)
    model_measurement.plotROC(predictions_xgb_WOE,Y_woe_test,name='XGB+WOE',show=show_result)

예제 #10

0

파일 보기

def logistics_mod_woe_SMOTE(X_woe_train,X_woe_test,Y_woe_train,Y_woe_test,drop_feature,minority,combined,show_result):
    X_woe_train_SMOTE, Y_woe_train_SMOTE, Transdata_woe_train_SMOTE = processing.data_SMOTE(X_woe_train, Y_woe_train, minority=minority, combined=combined, WOE = True)
    print(Transdata_woe_train_SMOTE.groupby(['是否是诈骗电话']).count())
    logistics_mod_woe_SMOTE = linear_model.LogisticRegression(solver='liblinear',C=1.0, tol=1e-6,verbose=0)#,penalty='l2')
    X_woe_train_SMOTE1=Transdata_woe_train_SMOTE.drop(['是否是诈骗电话']+ drop_feature,axis=1)
    logistics_mod_woe_SMOTE.fit(X_woe_train_SMOTE1, Y_woe_train_SMOTE)
    X_woe_test1=X_woe_test.drop(drop_feature,axis=1)
    predictions_woe_SMOTE = logistics_mod_woe_SMOTE.predict_proba(X_woe_test1)#每一类的概率
    model_measurement.plotROC(predictions_woe_SMOTE,Y_woe_test,'LR+WOE+SMOTE',show=show_result)
    if show_result:
        model_measurement.plotKS(predictions_woe_SMOTE, Y_woe_test, 'LR+WOE+SMOTE')
        model_measurement.VIF(X_woe_train_SMOTE1, name='X_woe_train_SMOTE1', combined=combined, WOE=True)
    else:
        pass

예제 #11

0

파일 보기

def SVM_grid(X_train,Y_train,X_test,Y_test,grid_kernel,grid_C,n_jobs,drop_feature,show_result):

    # 使用网格搜索法，选择非线性可分SVM“类”中的最佳C值和核函数
    parameters = {'kernel': grid_kernel, 'C': grid_C}
    grid_svc = model_selection.GridSearchCV(estimator=svm.SVC(gamma = 'auto'),
                                            param_grid=parameters,
                                            scoring='roc_auc', cv=5, verbose=1,n_jobs=n_jobs)
    # 模型在训练数据集上的拟合
    X_train1 = X_train.drop(drop_feature, axis=1)
    grid_svc.fit(X_train1, Y_train)
    print('the best params:{}, the best training AUC:{:0.3f}'.format(grid_svc.best_params_, grid_svc.best_score_))
    grid_svc_best = svm.SVC(C = grid_svc.best_params_['C'], kernel=grid_svc.best_params_['kernel'],gamma='auto')
    X_train1 = X_train.drop(drop_feature, axis=1)
    grid_svc_best.fit(X_train1, Y_train)
    X_test1 = X_test.drop(drop_feature, axis=1)
    pred_SVM = grid_svc_best.predict(X_test1)
    model_measurement.plotROC(pred_SVM, Y_test, name='SVM(Grid)',method='SVM',show=show_result)

예제 #12

0

파일 보기

def logistics_mod_SMOTE(X_train,Y_train,X_test,Y_test,drop_feature,minority,combined,show_result):
    X_train_SMOTE, Y_train_SMOTE, Transdata_train_SMOTE = processing.data_SMOTE(X_train,
                                                                                Y_train, minority= minority, combined= combined, WOE = False)

    print(Transdata_train_SMOTE.groupby(['是否是诈骗电话']).count())
    logistics_mod_SMOTE = linear_model.LogisticRegression(solver='liblinear',C=1.0, tol=1e-6,verbose=0)#,penalty='l2')
    if combined:
        X_train1_SMOTE = Transdata_train_SMOTE.drop(['是否是诈骗电话','主叫_离散率组合_(1.0, 1.0)', '归属地是否未知_0.0'] + drop_feature, axis=1)
        X_test1 = X_test.drop(drop_feature+['主叫_离散率组合_(1.0, 1.0)', '归属地是否未知_0.0'], axis=1)
    else:
        X_train1_SMOTE = Transdata_train_SMOTE.drop(['是否是诈骗电话'] + drop_feature, axis=1)
        X_test1 = X_test.drop(drop_feature, axis=1)
    logistics_mod_SMOTE.fit(X_train1_SMOTE, Y_train_SMOTE)
    predictions_SMOTE = logistics_mod_SMOTE.predict_proba(X_test1)#每一类的概率
    model_measurement.plotROC(predictions_SMOTE,Y_test,'LR+SMOTE',show=show_result)

    if show_result:
        model_measurement.plotKS(predictions_SMOTE, Y_test, 'LR+SMOTE')
        model_measurement.VIF(X_train1_SMOTE, name='X_train_SMOTE1', combined=combined, WOE=False)
        model_measurement.coef(logistics_mod_SMOTE, X_train1_SMOTE, Y_train_SMOTE)
    else:
        pass

예제 #13

0

파일 보기

def SVM_woe_SMOTE_grid(X_woe_train,X_woe_test,Y_woe_train,Y_woe_test,grid_kernel,grid_C,n_jobs,drop_feature,minority,combined,show_result):

    X_woe_train_SMOTE, Y_woe_train_SMOTE, Transdata_woe_train_SMOTE = processing.data_SMOTE(X_woe_train, Y_woe_train, minority=minority, combined=combined, WOE=True)

    # 使用网格搜索法，选择非线性可分SVM“类”中的最佳C值和核函数
    parameters = {'kernel': grid_kernel, 'C': grid_C}
    grid_svc = model_selection.GridSearchCV(estimator=svm.SVC(gamma = 'auto'),
                                            param_grid=parameters,
                                            scoring='roc_auc', cv=5, verbose=1,n_jobs=n_jobs)
    # 模型在训练数据集上的拟合
    X_woe_train_SMOTE1 = Transdata_woe_train_SMOTE.drop(['是否是诈骗电话'] + drop_feature, axis=1)
    grid_svc.fit(X_woe_train_SMOTE1, Y_woe_train_SMOTE)
    print('the best params:{}, the best training AUC:{:0.3f}'.format(grid_svc.best_params_, grid_svc.best_score_))


    grid_svc_best = svm.SVC(C = grid_svc.best_params_['C'], kernel=grid_svc.best_params_['kernel'],gamma='auto')

    X_woe_train_SMOTE1 = Transdata_woe_train_SMOTE.drop(['是否是诈骗电话'] + drop_feature, axis=1)
    grid_svc_best.fit(X_woe_train_SMOTE1, Y_woe_train_SMOTE)
    X_woe_test1 = X_woe_test.drop(drop_feature, axis=1)
    pred_SVM_woe_SMOTE = grid_svc_best.predict(X_woe_test1)
    model_measurement.plotROC(pred_SVM_woe_SMOTE, Y_woe_test, name='SVM+WOE+SMOTE(Grid)',method='SVM+WOE+SMOTE',show=show_result)