Python DataTools.compute_score 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: tools.tools_data

클래스/타입: DataTools

메소드/함수: compute_score

hotexamples.com에서의 예제들: 3

Python DataTools.compute_score - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 tools.tools_data.DataTools.compute_score에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

data_split(9)

print_data_ratio(9)

compute_confusion_matrix(7)

compute_score_list(4)

compute_score(3)

예제 #1

파일 보기

파일: model_all.py 프로젝트: yoseng28/credit_fraud

    def model_all_own(clf, X_train, y_train, X_test, y_test):

        print('*******************************************')
        print(clf.__class__.__name__, '开始fit...')
        start_time = time()
        clf.fit(X_train, y_train.values.ravel())
        y_pred = clf.predict(X_test)
        y_perd_prob = clf.predict_proba(X_test)
        end_time = time()

        result = {}
        roc_pr = {}
        recall_, accuracy_, precision_, f1_, f5_, auc_, g_mean_, fpr_, tpr_ = \
            DataTools.compute_score(y_test, y_pred, y_perd_prob)

        result['recall'] = recall_
        result['acc'] = accuracy_
        result['precision'] = precision_
        result['f1'] = f1_
        result['f5'] = f5_
        result['auc'] = auc_
        result['gmean'] = g_mean_
        result['time'] = end_time - start_time

        roc_pr['fpr'] = fpr_
        roc_pr['tpr'] = tpr_

        print("{} 训练结束，耗时： {:.4f} ".format(clf.__class__.__name__,
                                           (end_time - start_time)))

        return result, roc_pr

예제 #2

파일 보기

파일: result_LR.py 프로젝트: yoseng28/credit_fraud

    def LR_EE_smote(data):
        # 子集数目
        num_subsets = 10
        X, y = DataPreprocessing.read_X_y(data)
        print('原始数据：')
        DataTools.print_data_ratio(y)
        X_train, X_test, y_train, y_test = DataTools.data_split(X, y)
        print('分割后的测试集：')
        DataTools.print_data_ratio(y_test)
        print('分割后的训练集：')
        DataTools.print_data_ratio(y_train)

        result = {}
        result_recall = []
        result_acc = []
        result_precision = []
        result_f1 = []
        result_auc = []
        result_gmean = []
        result_fpr_temps = []
        result_tpr_temps = []

        start_time = time.clock()
        for i in (range(num_subsets)):
            print(
                '******************************************************************************'
            )
            print('第 ', i + 1, ' 个分类器开始：')
            # EE&smote后的数据

            X_ee_smote, y_ee_smote = SmoteEE.smoteEE_own(X_train, y_train)
            DataTools.print_data_ratio(y_ee_smote)

            # 训练参数
            # print('训练集子集%d：' % (i + 1))
            # ClassifierLR.lr_grid_search_cv(X_ee_smote, y_ee_smote)

            pd.concat([X_ee_smote, y_ee_smote],
                      axis=1).to_csv('data/subsets/lr_subset%d.csv' % (i + 1))
            print('第%d个 子集导出成功！' % (i + 1))
            print('训练集子集%d：' % (i + 1))
            DataTools.print_data_ratio(y_ee_smote)

            y_predict = ClassifierLR.fit_model_LR(X_ee_smote, y_ee_smote,
                                                  X_test, 0.1)
            y_predict_prob = ClassifierLR.lr_predict_proba(
                X_ee_smote, y_ee_smote, X_test, 0.1)
            recall_, accuracy_, precision_, f1_, auc_, g_mean_, fpr_, tpr_ = \
                DataTools.compute_score(y_test, y_predict, y_predict_prob)
            result_recall.append(recall_)
            result_acc.append(accuracy_)
            result_precision.append(precision_)
            result_f1.append(f1_)
            result_auc.append(auc_)
            result_gmean.append(g_mean_)
            result_fpr_temps.append(fpr_)
            result_tpr_temps.append(tpr_)

        end_time = time.clock()
        result['time'] = end_time - start_time
        result['recall'] = np.mean(result_recall)
        result['acc'] = np.mean(result_acc)
        result['precision'] = np.mean(result_precision)
        result['f1'] = np.mean(result_f1)
        result['auc'] = np.mean(result_auc)
        result['gmean'] = np.mean(result_gmean)
        result['fpr'] = pd.DataFrame(result_fpr_temps).mean()
        result['tpr'] = pd.DataFrame(result_tpr_temps).mean()
        pd.DataFrame(result).to_csv('data/score/lr_ee_tuned.csv')
        print('结果已保存至score文件夹下 ^_^')

예제 #3

파일 보기

파일: result_XGB.py 프로젝트: yoseng28/credit_fraud

    def XGB_EE(data):
        # 子集数目
        num_subsets = 5
        X, y = DataPreprocessing.read_X_y(data)
        X_train_tmp, X_test, y_train_tmp, y_test = DataTools.data_split(X, y)
        X_train, X_validate, y_train, y_validate = DataTools.data_split(
            X_train_tmp, y_train_tmp)

        result = {}
        result_recall = []
        result_acc = []
        result_precision = []
        result_f1 = []
        result_f5 = []
        result_auc = []
        result_gmean = []
        result_fpr_temps = []
        result_tpr_temps = []

        start_time = time.clock()
        for i in (range(num_subsets)):
            print(
                '******************************************************************************'
            )
            print('第 ', i + 1, ' 个分类器开始：')
            # EE&smote后的数据
            X_ee, y_ee = EE.ee_own(X_train, y_train)
            pd.concat([X_ee, y_ee],
                      axis=1).to_csv('data/subsets/subset_ee%d.csv' % (i + 1))
            print('第%d个 子集导出成功！' % (i + 1))
            print('训练集子集%d：' % (i + 1))
            DataTools.print_data_ratio(y_ee)

            # 训练参数
            # ModelXGB.xgb_cv_param(X_ee, y_ee)
            # ModelXGB.xgb_gridSearchCV(X_ee, y_ee)
            # return

            y_predict = ModelXGB.xgb_predict(X_ee, y_ee, X_test)
            y_predict_prob = ModelXGB.xgb_predict_prob(X_ee, y_ee, X_test)
            recall_, accuracy_, precision_, f1_, f5_, auc_, g_mean_, fpr_, tpr_ = \
                DataTools.compute_score(y_test, y_predict, y_predict_prob)
            result_recall.append(recall_)
            result_acc.append(accuracy_)
            result_precision.append(precision_)
            result_f1.append(f1_)
            result_f5.append(f5_)
            result_auc.append(auc_)
            result_gmean.append(g_mean_)
            result_fpr_temps.append(fpr_)
            result_tpr_temps.append(tpr_)

        end_time = time.clock()
        result['time'] = end_time - start_time
        result['recall'] = np.mean(result_recall)
        result['acc'] = np.mean(result_acc)
        result['precision'] = np.mean(result_precision)
        result['f1'] = np.mean(result_f1)
        result['f5'] = np.mean(result_f5)
        result['auc'] = np.mean(result_auc)
        result['gmean'] = np.mean(result_gmean)
        result['fpr'] = pd.DataFrame(result_fpr_temps).mean()
        result['tpr'] = pd.DataFrame(result_tpr_temps).mean()
        pd.DataFrame(result).to_csv('data/score2/xgb_ee3.csv')
        print('结果已保存至score文件夹下 ^_^')
        # # 计算混淆矩阵
        cnf_matrix = DataTools.compute_confusion_matrix(y_test, y_predict)
        # # 绘制混淆矩阵图
        PlotTools.plot_confusion_matrix(cnf_matrix, title='Confusion matrix')

        PlotTools.plot_roc_curve(y_test, y_predict_prob[:, 1])