Ejemplo n.º 1
0
def f_trainModelMain(train_path, test_path, index_name, target_name, userPath, modeltype, arithmetic):
    '''
    调用模型的主函数
    :param train_path: 训练集 str DataFrame
    :param test_path: 测试集 str DataFrame
    :param index_name: 索引
    :param target_name: 目标变量列名
    :param userPath: 用户路径
    :param modeltype: 模型的简称 有12个取值 [ccxboost,ccxgbm,ccxrf] + [demo,speed,accuracy,stable]
    :param arithmetic: [Xgboost,GBM,RF]
    :return: 最优模型结果的文件路径
    '''
    modelCode = f_genmodelCodeDict(userPath)
    modelmain = ModelMain(train_path, test_path, index_name, target_name)

    if arithmetic == 'Xgboost':
        return f_xgboost(modelmain, modeltype, modelCode)
    elif arithmetic == 'GBM':
        return f_gbm(modelmain, modeltype, modelCode)
    elif arithmetic == 'RF':
        return f_rf(modelmain, modeltype, modelCode)
    else:
        # 写日志
        print('错误码003 模型没有跑起来')
        return []
Ejemplo n.º 2
0
def f_recursionrfModel(train_path, test_path, index_name, target_name, modelconf, i,optimizationType):
    '''
    递归的将上一轮的重要变量重新作为输入 从而达到筛选变量的作用
    :param train_path: 训练集
    :param test_path: 测试集
    :param index_name:
    :param target_name:
    :param modelconf: 模型配置文件路径
    :param i: 记录递归的次数
    :return: 最终递归完成的模型输出 结果的路径列表
    '''
    train_path = ModelUtil.load_data(train_path)
    test_path = ModelUtil.load_data(test_path)
    initmodelmain = ModelMain(train_path, test_path, index_name, target_name)
    initpathlist = initmodelmain.ccxrf_main(modelconf,optimizationType)

    # 1.计算出重要变量的个数
    implen, impvar = f_getImplen(initpathlist[2])
    # 2.计算出模型的AUC和KS
    train_auc, train_ks = f_getAucKs(initpathlist[3])
    test_auc, test_ks = f_getAucKs(initpathlist[4])
    # 3.判断出模型重要变量占总变量的百分比情况
    imppct = f_getVarpctrf(initpathlist[1], implen)  # 入模变量 == 重要变量
    flag = f_flag(train_auc, train_ks, test_auc, test_ks, imppct)
    i = i + 1
    if i < 5:
        if flag:
            print('递归调用 ' * 20)
            newselectcol = impvar + [index_name, target_name]
            print('---入选模型的变量个数%s' % len(newselectcol))
            train_path = ModelUtil.load_data(train_path)[newselectcol]
            test_path = ModelUtil.load_data(test_path)[newselectcol]
            print('##' * 20, i, '##' * 20)
            # 后续优化 递归的同时修改配置文件modelconf
            return f_recursionrfModel(train_path, test_path, index_name, target_name, modelconf, i,optimizationType)

        else:
            print('满足条件结束递归 ' * 10)
            return initpathlist
    else:
        print('递归次数达到要求结束递归' * 10)
        return initpathlist
Ejemplo n.º 3
0
def f_trainModelMain(train_path, test_path, index_name, target_name, userPath, modeltype, arithmetic,optimizationType,is_auto,param):
    '''
    调用模型的主函数
    :param train_path: 训练集 str DataFrame
    :param test_path: 测试集 str DataFrame
    :param index_name: 索引
    :param target_name: 目标变量列名
    :param userPath: 用户路径
    :param modeltype: 模型的简称 有24个取值 [grid,bayes] + [ccxboost,ccxgbm,ccxrf] + [demo,speed,accuracy,stable]
    :param arithmetic: [Xgboost,GBM,RF]
    :param optimizationType: [grid,bayes]
    :param is_auto: 是否自动寻参
    :param param: 自动寻参的超参数范围
    :return: 最优模型结果的文件路径
    '''
    modelCode = f_genmodelCodeDict(userPath)
    modelmain = ModelMain(train_path, test_path, index_name, target_name)

    #自动寻优的入口 is_auto==0 表示手动  仅仅bayes方式存在手动调参
    if int(is_auto) == 0 and optimizationType == 'bayes':
        try:
            #进来了  下面就是自动寻参的天下了
            manual_conf_path = create_conf(arithmetic, param,userPath)
            if arithmetic == 'Xgboost':
                return f_recursionboostModel(train_path, test_path, index_name, target_name, manual_conf_path, 0,
                                      optimizationType)
            elif arithmetic == 'GBM':
                return f_recursiongbmModel(train_path, test_path, index_name, target_name, manual_conf_path, 0,
                                      optimizationType)
            elif arithmetic == 'RF':
                return f_recursionrfModel(train_path, test_path, index_name, target_name, manual_conf_path, 0,
                                      optimizationType)
            else:
                # 写日志
                print('错误码005 模型没有跑起来')
                return []

        except Exception as e:
            print('错误码004 手动寻参出错问题为:')
            print("ErrorMsg:\t"+str(e))
            return []

    if arithmetic == 'Xgboost':
        return f_xgboost(modelmain, modeltype, modelCode,optimizationType)
    elif arithmetic == 'GBM':
        return f_gbm(modelmain, modeltype, modelCode,optimizationType)
    elif arithmetic == 'RF':
        return f_rf(modelmain, modeltype, modelCode,optimizationType)
    else:
        # 写日志
        print('错误码003 模型没有跑起来')
        return []
Ejemplo n.º 4
0
    r'C:\Users\liyin\Desktop\CcxFpABSModel\updateModel\FPALLVAR_1103.csv')
train = pd.merge(train_index, FPALLVAR)
test = pd.merge(test_index, FPALLVAR)

######
train_path = r'C:\Users\liyin\Desktop\CcxFpABSModel\updateModel\train_1103.csv'  # 训练集数据路径,可见demo_data文件夹
test_path = r'C:\Users\liyin\Desktop\CcxFpABSModel\updateModel\test_1103.csv'  # 测试集数据路径,可见demo_data文件夹

# train.to_csv(train_path, index=False)
# test.to_csv(test_path, index=False)
############
from ccxmodel.modelmain import ModelMain

index_name = 'lend_request_id'  # 数据集唯一索引,有且仅支持一个索引,不支持多个索引
target_name = 'TargetBad_P12'  # 目标变量
modelmain = ModelMain(train_path, test_path, index_name, target_name)

modelmain.ccxboost_main(
    r'C:\Users\liyin\Desktop\CcxFpABSModel\updateModel\ccxboost_1.conf')
r"""
{'colsample_bytree': 0.80000000000000004, 'eta': 0.10000000000000001, 'eval_metric': 'auc', 'gamma': 2, 'lambda': 500, 'max_depth': 4, 'max_test_auc': 0.71066600000000002, 'max_train_auc': 0.75396439999999987, 'min_child_weight': 2, 'num_best_round': 144, 'num_boost_round': 500, 'num_maxtest_round': 494, 'objective': 'binary:logistic', 'subsample': 0.80000000000000004, 'gap': 0.042999999999999997}

模型保存成功 文件路径名:C:\Users\liyin\Desktop\CcxFpABSModel\updateModel\model20171103214623/modeltxt/model_Fp_Ccx_All_2017-11-03.txt
重要变量的个数:145
数据保存成功:C:\Users\liyin\Desktop\CcxFpABSModel\updateModel\model20171103214623/modeldata/d_2017-11-03_importance_var.csv
训练集模型报告:
              precision    recall  f1-score   support
        0.0       0.86      1.00      0.92     12354
        1.0       0.77      0.02      0.04      2068
avg / total       0.85      0.86      0.80     14422
测试集模型报告: