Beispiel #1
0
class ModelDetails():
    def __init__(self, intercept, coefs):
        """Because the scorecardpy package can only take a model class of
        LogisticRegression from the scikit-learn package this class is needed
        to hold the values from the statsmodels package.
        """

        self.intercept_ = [intercept]
        self.coef_ = [coefs.tolist()]


model = ModelDetails(fit.params[0], fit.params[1:])

# Create scorecard
card = sc.scorecard(bins, model, X_train.columns[1:], points0=800, pdo=50)

# Create scores
train_score = sc.scorecard_ply(train, card, print_step=0)
test_score = sc.scorecard_ply(test, card, print_step=0)

# Plot scorecard
sc.perf_psi(score={
    'train': train_score,
    'test': test_score
},
            label={
                'train': y_train,
                'test': y_test
            })
Beispiel #2
0
y_train = train_woe.loc[:,'creditability']
X_train = train_woe.loc[:,train_woe.columns != 'creditability']
y_test = test_woe.loc[:,'creditability']
X_test = test_woe.loc[:,train_woe.columns != 'creditability']

# logistic regression ------
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression(penalty='l1', C=0.9, solver='saga', n_jobs=-1)
lr.fit(X_train, y_train)
# lr.coef_
# lr.intercept_

# predicted proability
train_pred = lr.predict_proba(X_train)[:,1]
test_pred = lr.predict_proba(X_test)[:,1]

# performance ks & roc ------
train_perf = sc.perf_eva(y_train, train_pred, title = "train")
test_perf = sc.perf_eva(y_test, test_pred, title = "test")

# score ------
card = sc.scorecard(bins_adj, lr, X_train.columns)
# credit score
train_score = sc.scorecard_ply(train, card, print_step=0)
test_score = sc.scorecard_ply(test, card, print_step=0)

# psi
sc.perf_psi(
  score = {'train':train_score, 'test':test_score},
  label = {'train':y_train, 'test':y_test}
)
Beispiel #3
0
    def evaluate_main(self):
        writer = pd.ExcelWriter("{}_report.xlsx".format(self.filename))

        odds0 = float(self.df_train_woe[self.target_name].value_counts()[1]) / float(self.df_train_woe[self.target_name].value_counts()[0])
        b = self.double_score / np.log(2)
        a = self.base_score + b * np.log(odds0)
        card = sc.scorecard(self.bins_adj, self.model, self.final_features, points0=self.base_score,
                            odds0=odds0,
                            pdo=self.double_score)
        card_df = pd.DataFrame(columns=["variable", "bin", "points"])
        for key, value in card.items():
            card_df = pd.concat([card_df, value])
        card_df.to_excel(writer, 'card_result')

        self.train_pred = self.model.predict_proba(self.df_train_woe[self.final_features])[:, 1]
        perf = sc.perf_eva(self.df_train_woe[self.target_name], self.train_pred, title="train")
        print("On train-data, the evaluation follows:\nks={}, auc={}, gini={}".format(perf["KS"], perf["AUC"],
                                                                                      perf["Gini"]))
        perf["pic"].savefig("{}_train.png".format(self.filename))

        _score = sc.scorecard_ply(self.df_train, card, print_step=0)
        _score["flag"] = self.df_train_woe[self.target_name]
        _score["pred"] = self.train_pred

        _rs = self._get_score_table(_score, a, b)
        _rs.to_excel(writer, 'train_result')

        if self.df_test.any().any():
            y_test = self.df_test_woe[self.target_name]
            self.test_pred = self.model.predict_proba(self.df_test_woe[self.final_features])[:, 1]
            perf = sc.perf_eva(y_test, self.test_pred, title="test")
            print("On test-data, the evaluation follows:\nks={}, auc={}, gini={}".format(perf["KS"],
                                                                                          perf["AUC"],
                                                                                          perf["Gini"]))
            perf["pic"].savefig("{}_test.png".format(self.filename))

            _score = sc.scorecard_ply(self.df_test, card, print_step=0)
            _score["flag"] = self.df_test_woe[self.target_name]
            _score["pred"] = self.test_pred

            _rs = self._get_score_table(_score, a, b)
            _rs.to_excel(writer, 'test_result')


        if self.df_ott.any().any():
            y_ott = self.df_ott_woe[self.target_name]
            self.ott_pred = self.model.predict_proba(self.df_ott_woe[self.final_features])[:, 1]
            try:
                perf = sc.perf_eva(y_ott, self.ott_pred , title="ott")
                print("On ott-data, the evaluation follows:\nks={}, auc={}, gini={}".format(perf["KS"],
                                                                                        perf["AUC"],
                                                                                        perf["Gini"]))
                perf["pic"].savefig("{}_test.png".format(self.filename))

                _score = sc.scorecard_ply(self.df_ott, card, print_step=0)
                _score["flag"] = self.df_ott_woe[self.target_name]
                _score["pred"] = self.ott_pred

                _rs = self._get_score_table(_score,a,b)
                _rs.to_excel(writer, 'ott_result')

            except:
                self.log.info("Cannot caculation the ott data!")


        importance = {x: y for x, y in zip(self.final_features, self.model.coef_[0])}

        iv_df = self.iv_df[self.iv_df['variable'].isin(self.final_features)]
        iv_df["coef"] = iv_df.variable.map(lambda x: importance[x])
        iv_df.to_excel(writer, 'feature_importance')

        writer.close()

        self.log.info("全部环节结束,请查看相关文件!")
Beispiel #4
0
         'NumberRealEstateLoansOrLines_woe', 'NumberOfDependents_woe'], axis=1)

    train_pred = lr.predict_proba(x_train)[:, 1]
    test_pred = lr.predict_proba(x_test)[:, 1]

    # # lr (statsmodels version)
    # import statsmodels.api as sm
    # X1 = sm.add_constant(x_train)
    # logit = sm.Logit(y_train, X1)
    # result = logit.fit()
    # print(result.summary())
    # X2 = sm.add_constant(x_test)
    # predict = result.predict(X2)

    # score ------
    card = sc.scorecard(cutoff, lr, x_train.columns, points0=600, odds0=1 / 20, pdo=20, basepoints_eq0=False)
    column = ['basepoints', 'RevolvingUtilizationOfUnsecuredLines', 'age', 'NumberOfTime30-59DaysPastDueNotWorse',
              'NumberOfTimes90DaysLate', 'NumberOfTime60-89DaysPastDueNotWorse']

    if not (os.path.exists('./data/card')): os.makedirs(os.path.join('./data/card'))
    for i in card.keys():
        card[i].to_csv('./data/card/{}.csv'.format(i), index=False)

    # performance ks & roc ------
    train_perf = sc.perf_eva(y_train, train_pred, title="train")
    test_perf = sc.perf_eva(y_test, test_pred, title="test")

    # credit score
    train_score = sc.scorecard_ply(train_data, card, print_step=0, only_total_score=False)
    test_score = sc.scorecard_ply(test_data, card, print_step=0, only_total_score=False)
Beispiel #5
0
def execute_data():
    start = time.time()
    try:
        global modelPath, binsPath, bins, model
        logger.info('入参:%s', str(request.get_data()))
        request_data = request.get_json()  #获取传入数据
        paramsJson = request_data['paramsData']
        modelFilePath = modelFilePathCheck(request_data)

        if (type(paramsJson) == type({})):

            #原始数据中数值型字符串转为int64
            items = paramsJson.items()
            for key, value in items:
                if (is_number(value)):
                    paramsJson[key] = int(value)
            logger.info("paramsJson:%s", paramsJson)
            #构建dataFrame
            df = pd.json_normalize(paramsJson)

            if (modelPath != modelFilePath):
                logger.info('调用模型路径发生变化,重新加载模型!')
                logger.info('global modelFilePath:%s', modelPath)
                logger.info('param modelFilePath:%s', modelFilePath)
                modelPath = modelFilePath
                #导入模型
                model = joblib.load(modelFilePath)
            else:
                logger.info('调用模型路径未发生变化,使用缓存中模型。')
                logger.info('global modelFilePath:%s', modelPath)

            #原始数据转换为woe值
            bins = model.bins
            df_woe = sc.woebin_ply(df, bins)
            #打标签
            label = model.predict(df_woe)[0]

            #构建评分卡
            card = sc.scorecard(bins, model, df_woe.keys())
            #评分
            score = sc.scorecard_ply(df,
                                     card,
                                     only_total_score=False,
                                     print_step=0)
            #计算每个特征的得分
            featureScore = {}
            # featureScore = calculateFeatures(df, card)
            if isinstance(card, dict):
                card_df = pd.concat(card, ignore_index=True)
            elif isinstance(card, pd.DataFrame):
                card_df = card.copy(deep=True)
            # x variables
            xs = card_df.loc[card_df.variable != 'basepoints',
                             'variable'].unique()
            for i in xs:
                featureScore[i] = score[i + '_points'][0]

            result = {}
            result['code'] = '00000'
            result['score'] = str(score['score'][0])
            result['label'] = str(label)
            result['featureScore'] = featureScore
            end = time.time()
            logger.info("运行结果:%s,模型执行耗时:%s", result, end - start)
            return jsonify(result)

        code10002['errorMsg'] = '输入值错误:请传入json格式参数'
        return jsonify(code10002)

    except KeyError as e:
        logger.info(e)
        code10001['errorMsg'] = '输入特征错误:' + str(e)
        return jsonify(code10001)

    except ValueError as e:
        logger.info(e)
        code10002['errorMsg'] = '输入值错误:' + str(e)
        return jsonify(code10002)

    except Exception as e:
        logger.info(e)
        code10003['errorMsg'] = '未知错误:' + str(e)
        return jsonify(code10003)